shadPS4/src/video_core/amdgpu/liverpool.h

1075 lines
33 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <condition_variable>
#include <coroutine>
#include <mutex>
#include <span>
#include <thread>
#include <queue>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h"
namespace Vulkan {
class Rasterizer;
}
namespace AmdGpu {
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
#define CONCAT2(x, y) DO_CONCAT2(x, y)
#define DO_CONCAT2(x, y) x##y
#define INSERT_PADDING_WORDS(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
struct Liverpool {
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
static constexpr u32 NumQueuesPerPipe = 8u;
static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe);
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ConfigRegWordOffset = 0x2000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumShaderUserData>;
struct BinaryInfo {
static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr
std::array<u8, sizeof(signature_ref)> signature;
u8 version;
u32 pssl_or_cg : 1;
u32 cached : 1;
u32 type : 4;
u32 source_type : 2;
u32 length : 24;
u8 chunk_usage_base_offset_in_dw;
u8 num_input_usage_slots;
u8 is_srt : 1;
u8 is_srt_used_info_valid : 1;
u8 is_extended_usage_info : 1;
u8 reserved2 : 5;
u8 reserved3;
u64 shader_hash;
u32 crc32;
bool Valid() const {
return shader_hash && crc32 &&
(std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0);
}
};
struct ShaderProgram {
u32 address_lo;
BitField<0, 8, u32> address_hi;
union {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
BitField<33, 5, u64> num_user_regs;
} settings;
UserData user_data;
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T>(addr);
}
std::span<const u32> Code() const {
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
return std::span{code, num_dwords};
}
};
struct ComputeProgram {
u32 dispatch_initiator;
u32 dim_x;
u32 dim_y;
u32 dim_z;
u32 start_x;
u32 start_y;
u32 start_z;
struct {
u16 full;
u16 partial;
} num_thread_x, num_thread_y, num_thread_z;
INSERT_PADDING_WORDS(1);
BitField<0, 12, u32> max_wave_id;
u32 address_lo;
BitField<0, 8, u32> address_hi;
INSERT_PADDING_WORDS(4);
union {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs;
BitField<47, 9, u64> lds_dwords;
} settings;
INSERT_PADDING_WORDS(1);
u32 resource_limits;
INSERT_PADDING_WORDS(0x2A);
UserData user_data;
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T>(addr);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords.Value() * 128 * 4;
}
std::span<const u32> Code() const {
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
return std::span{code, num_dwords};
}
};
template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32*>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo;
}
union PsInputControl {
u32 raw;
BitField<0, 5, u32> input_offset;
BitField<5, 1, u32> use_default;
BitField<8, 2, u32> default_value;
BitField<10, 1, u32> flat_shade;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
TwoComp = 2,
FourCompCompressed = 3,
FourComp = 4,
};
union ShaderPosFormat {
u32 raw;
BitField<0, 4, ShaderExportComp> pos0;
BitField<4, 4, ShaderExportComp> pos1;
BitField<8, 4, ShaderExportComp> pos2;
BitField<12, 4, ShaderExportComp> pos3;
};
enum class ShaderExportFormat : u32 {
Zero = 0,
R_32 = 1,
GR_32 = 2,
AR_32 = 3,
ABGR_FP16 = 4,
ABGR_UNORM16 = 5,
ABGR_SNORM16 = 6,
ABGR_UINT16 = 7,
ABGR_SINT16 = 8,
ABGR_32 = 9,
};
union ColorExportFormat {
u32 raw;
BitField<0, 4, ShaderExportFormat> col0;
BitField<4, 4, ShaderExportFormat> col1;
BitField<8, 4, ShaderExportFormat> col2;
BitField<12, 4, ShaderExportFormat> col3;
BitField<16, 4, ShaderExportFormat> col4;
BitField<20, 4, ShaderExportFormat> col5;
BitField<24, 4, ShaderExportFormat> col6;
BitField<28, 4, ShaderExportFormat> col7;
};
union VsOutputControl {
u32 raw;
BitField<0, 8, u32> clip_distance_enable;
BitField<8, 8, u32> cull_distance_enable;
BitField<16, 1, u32> use_vtx_point_size;
BitField<17, 1, u32> use_vtx_edge_flag;
BitField<18, 1, u32> use_vtx_render_target_idx;
BitField<19, 1, u32> use_vtx_viewport_idx;
BitField<20, 1, u32> use_vtx_kill_flag;
BitField<21, 1, u32> vs_out_misc_enable;
BitField<22, 1, u32> vs_out_ccdist0_enable;
BitField<23, 1, u32> vs_out_ccdist1_enable;
BitField<25, 1, u32> use_vtx_gs_cut_flag;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable.Value() >> index) & 1;
}
bool IsCullDistEnabled(u32 index) const {
return (cull_distance_enable.Value() >> index) & 1;
}
};
enum class ZOrder : u32 {
LateZ = 0,
EarlyZLateZ = 1,
ReZ = 2,
EarlyZReZ = 3,
};
enum class ConservativeDepth : u32 {
Any = 0,
LessThanZ = 1,
GreaterThanZ = 2,
};
union DepthBufferControl {
u32 raw;
BitField<0, 1, u32> z_export_enable;
BitField<1, 1, u32> stencil_test_val_export_enable;
BitField<2, 1, u32> stencil_op_val_export_enable;
BitField<4, 2, ZOrder> z_order;
BitField<6, 1, u32> kill_enable;
BitField<7, 1, u32> coverage_to_mask_enable;
BitField<8, 1, u32> mask_export_enable;
BitField<9, 1, u32> exec_on_hier_fail;
BitField<10, 1, u32> exec_on_noop;
BitField<11, 1, u32> alpha_to_mask_disable;
BitField<12, 1, u32> depth_before_shader;
BitField<13, 2, ConservativeDepth> conservative_z_export;
};
enum class CompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
union DepthControl {
u32 raw;
BitField<0, 1, u32> stencil_enable;
BitField<1, 1, u32> depth_enable;
BitField<2, 1, u32> depth_write_enable;
BitField<3, 1, u32> depth_bounds_enable;
BitField<4, 3, CompareFunc> depth_func;
BitField<7, 1, u32> backface_enable;
BitField<8, 3, CompareFunc> stencil_ref_func;
BitField<20, 3, CompareFunc> stencil_bf_func;
BitField<30, 1, u32> enable_color_writes_on_depth_fail;
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
Ones = 2,
ReplaceTest = 3,
ReplaceOp = 4,
AddClamp = 5,
SubClamp = 6,
Invert = 7,
AddWrap = 8,
SubWrap = 9,
And = 10,
Or = 11,
Xor = 12,
Nand = 13,
Nor = 14,
Xnor = 15,
};
union StencilControl {
u32 raw;
BitField<0, 4, StencilFunc> stencil_fail_front;
BitField<4, 4, StencilFunc> stencil_zpass_front;
BitField<8, 4, StencilFunc> stencil_zfail_front;
BitField<12, 4, StencilFunc> stencil_fail_back;
BitField<16, 4, StencilFunc> stencil_zpass_back;
BitField<20, 4, StencilFunc> stencil_zfail_back;
};
union StencilRefMask {
u32 raw;
BitField<0, 8, u32> stencil_test_val;
BitField<8, 8, u32> stencil_mask;
BitField<16, 8, u32> stencil_write_mask;
BitField<24, 8, u32> stencil_op_val;
};
struct DepthBuffer {
enum class ZFormat : u32 {
Invald = 0,
Z16 = 1,
Z32Float = 3,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
BitField<27, 1, u32> allow_expclear;
BitField<28, 1, u32> read_size;
BitField<29, 1, u32> tile_surface_en;
BitField<31, 1, u32> zrange_precision;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
u64 Address() const {
return u64(z_read_base) << 8;
}
size_t GetSizeAligned() const {
return depth_slice.tile_max * 8;
}
};
enum class ClipSpace : u32 {
MinusWToW = 0,
ZeroToW = 1,
};
enum class PrimKillCond : u32 {
AllVtx = 0,
AnyVtx = 1,
};
union ClipperControl {
u32 raw;
BitField<0, 6, u32> user_clip_plane_enable;
BitField<16, 1, u32> clip_disable;
BitField<19, 1, ClipSpace> clip_space;
BitField<21, 1, PrimKillCond> vtx_kill_or;
BitField<22, 1, u32> dx_rasterization_kill;
BitField<23, 1, u32> dx_linear_attr_clip_enable;
BitField<26, 1, u32> zclip_near_disable;
BitField<26, 1, u32> zclip_far_disable;
};
enum class PolygonMode : u32 {
Point = 0,
Line = 1,
Fill = 2,
};
enum class ProvokingVtxLast : u32 {
First = 0,
Last = 1,
};
enum class CullMode : u32 {
None = 0,
Front = 1,
Back = 2,
FrontAndBack = 3,
};
enum class FrontFace : u32 {
CounterClockwise = 0,
Clockwise = 1,
};
union PolygonControl {
u32 raw;
BitField<0, 1, u32> cull_front;
BitField<1, 1, u32> cull_back;
BitField<2, 1, FrontFace> front_face;
BitField<3, 2, u32> enable_polygon_mode;
BitField<5, 3, PolygonMode> polygon_mode_front;
BitField<8, 3, PolygonMode> polygon_mode_back;
BitField<11, 1, u32> enable_polygon_offset_front;
BitField<12, 1, u32> enable_polygon_offset_back;
BitField<13, 1, u32> enable_polygon_offset_para;
BitField<13, 1, u32> enable_window_offset;
BitField<19, 1, ProvokingVtxLast> provoking_vtx_last;
PolygonMode PolyMode() const {
return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill;
}
CullMode CullingMode() const {
return static_cast<CullMode>(cull_front | cull_back << 1);
}
};
union VsOutputConfig {
u32 raw;
BitField<1, 5, u32> export_count_min_one;
BitField<6, 1, u32> half_pack;
u32 NumExports() const {
return export_count_min_one.Value() + 1;
}
};
union ColorBufferMask {
enum ColorComponent : u32 {
ComponentR = (1u << 0),
ComponentG = (1u << 1),
ComponentB = (1u << 2),
ComponentA = (1u << 3),
};
u32 raw;
BitField<0, 4, u32> output0_mask;
BitField<4, 4, u32> output1_mask;
BitField<8, 4, u32> output2_mask;
BitField<12, 4, u32> output3_mask;
BitField<16, 4, u32> output4_mask;
BitField<20, 4, u32> output5_mask;
BitField<24, 4, u32> output6_mask;
BitField<28, 4, u32> output7_mask;
u32 GetMask(int buf_id) const {
return (raw >> (buf_id * 4)) & 0xfu;
}
};
struct IndexBufferBase {
BitField<0, 8, u32> base_addr_hi;
u32 base_addr_lo;
template <typename T = VAddr>
T Address() const {
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
enum class IndexType : u32 {
Index16 = 0,
Index32 = 1,
};
enum class IndexSwapMode : u32 {
None = 0,
Swap16 = 1,
Swap32 = 2,
SwapWord = 3,
};
union IndexBufferType {
u32 raw;
BitField<0, 2, IndexType> index_type;
BitField<2, 2, IndexSwapMode> swap_mode;
};
union VgtNumInstances {
u32 num_instances;
u32 NumInstances() const {
return num_instances == 0 ? 1 : num_instances;
}
};
struct Scissor {
struct {
s16 top_left_x;
s16 top_left_y;
};
union {
BitField<0, 15, u32> bottom_right_x;
BitField<16, 15, u32> bottom_right_y;
};
u32 GetWidth() const {
return static_cast<u32>(bottom_right_x - top_left_x);
}
u32 GetHeight() const {
return static_cast<u32>(bottom_right_y - top_left_y);
}
};
struct ViewportScissor {
union {
BitField<0, 15, s32> top_left_x;
BitField<15, 15, s32> top_left_y;
BitField<30, 1, s32> window_offset_disble;
};
union {
BitField<0, 15, s32> bottom_right_x;
BitField<15, 15, s32> bottom_right_y;
};
u32 GetWidth() const {
return bottom_right_x - top_left_x;
}
u32 GetHeight() const {
return bottom_right_y - top_left_y;
}
};
struct ViewportDepth {
float zmin;
float zmax;
};
struct ViewportBounds {
float xscale;
float xoffset;
float yscale;
float yoffset;
float zscale;
float zoffset;
};
union ViewportControl {
BitField<0, 1, u32> xscale_enable;
BitField<1, 1, u32> xoffset_enable;
BitField<2, 1, u32> yscale_enable;
BitField<3, 1, u32> yoffset_enable;
BitField<4, 1, u32> zscale_enable;
BitField<5, 1, u32> zoffset_enable;
BitField<8, 1, u32> xy_transformed;
BitField<9, 1, u32> z_transformed;
BitField<10, 1, u32> w_transformed;
};
struct ClipUserData {
u32 data_x;
u32 data_y;
u32 data_z;
u32 data_w;
};
struct BlendConstants {
float red;
float green;
float blue;
float alpha;
};
union BlendControl {
enum class BlendFactor : u32 {
Zero = 0,
One = 1,
SrcColor = 2,
OneMinusSrcColor = 3,
SrcAlpha = 4,
OneMinusSrcAlpha = 5,
DstAlpha = 6,
OneMinusDstAlpha = 7,
DstColor = 8,
OneMinusDstColor = 9,
SrcAlphaSaturate = 10,
ConstantColor = 13,
OneMinusConstantColor = 14,
Src1Color = 15,
InvSrc1Color = 16,
Src1Alpha = 17,
InvSrc1Alpha = 18,
ConstantAlpha = 19,
OneMinusConstantAlpha = 20,
};
enum class BlendFunc : u32 {
Add = 0,
Subtract = 1,
Min = 2,
Max = 3,
ReverseSubtract = 4,
};
BitField<0, 5, BlendFactor> color_src_factor;
BitField<5, 3, BlendFunc> color_func;
BitField<8, 5, BlendFactor> color_dst_factor;
BitField<16, 5, BlendFactor> alpha_src_factor;
BitField<21, 3, BlendFunc> alpha_func;
BitField<24, 5, BlendFactor> alpha_dst_factor;
BitField<29, 1, u32> separate_alpha_blend;
BitField<30, 1, u32> enable;
};
union ColorControl {
enum class OperationMode : u32 {
Disable = 0u,
Normal = 1u,
EliminateFastClear = 2u,
Resolve = 3u,
FmaskDecompress = 5u,
};
BitField<3, 1, u32> degamma_enable;
BitField<4, 3, OperationMode> mode;
BitField<16, 8, u32> rop3;
};
struct ColorBuffer {
enum class EndianSwap : u32 {
None = 0,
Swap8In16 = 1,
Swap8In32 = 2,
Swap8In64 = 3,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
StandardReverse = 2,
AlternateReverse = 3,
};
enum class RoundMode : u32 {
ByHalf = 0,
Truncate = 1,
};
u32 base_address;
union {
BitField<0, 11, u32> tile_max;
BitField<20, 11, u32> fmask_tile_max;
} pitch;
union {
BitField<0, 22, u32> tile_max;
} slice;
union {
BitField<0, 11, u32> slice_start;
BitField<13, 11, u32> slice_max;
} view;
union {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 3, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
BitField<15, 1, u32> blend_clamp;
BitField<16, 1, u32> blend_bypass;
BitField<17, 1, u32> simple_float;
BitField<18, 1, RoundMode> round_mode;
BitField<19, 1, u32> cmask_is_linear;
} info;
union {
BitField<0, 5, TilingMode> tile_mode_index;
BitField<5, 5, u32> fmask_tile_mode_index;
BitField<12, 3, u32> num_samples_log2;
BitField<15, 2, u32> num_fragments_log2;
BitField<17, 1, u32> force_dst_alpha_1;
} attrib;
INSERT_PADDING_WORDS(1);
u32 cmask_base_address;
union {
BitField<0, 14, u32> tile_max;
} cmask_slice;
u32 fmask_base_address;
union {
BitField<0, 14, u32> tile_max;
} fmask_slice;
u32 clear_word0;
u32 clear_word1;
INSERT_PADDING_WORDS(2);
operator bool() const {
return info.format != DataFormat::FormatInvalid;
}
u32 Pitch() const {
return (pitch.tile_max + 1) << 3;
}
u32 Height() const {
return (slice.tile_max + 1) * 64 / Pitch();
}
u64 Address() const {
return u64(base_address) << 8;
}
VAddr CmaskAddress() const {
return VAddr(cmask_base_address) << 8;
}
VAddr FmaskAddress() const {
return VAddr(fmask_base_address) << 8;
}
size_t GetSizeAligned() const {
const auto num_bytes_per_element = NumBits(info.format) / 8u;
const auto slice_size = (slice.tile_max + 1) * 64u;
const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element;
ASSERT(total_size > 0);
return total_size;
}
TilingMode GetTilingMode() const {
return attrib.tile_mode_index;
}
bool IsTiled() const {
return !info.linear_general;
}
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb
: info.number_type.Value();
}
};
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
enum ContextRegs : u32 {
DbZInfo = 0xA010,
CbColor0Base = 0xA318,
CbColor1Base = 0xA327,
CbColor2Base = 0xA336,
CbColor3Base = 0xA345,
CbColor4Base = 0xA354,
CbColor5Base = 0xA363,
CbColor6Base = 0xA372,
CbColor7Base = 0xA381,
CbColor0Cmask = 0xA31F,
CbColor1Cmask = 0xA32E,
CbColor2Cmask = 0xA33D,
CbColor3Cmask = 0xA34C,
CbColor4Cmask = 0xA35B,
CbColor5Cmask = 0xA36A,
CbColor6Cmask = 0xA379,
CbColor7Cmask = 0xA388,
};
struct PolygonOffset {
float depth_bias;
float front_scale;
float front_offset;
float back_scale;
float back_offset;
};
struct Address {
u32 address;
VAddr GetAddress() const {
return u64(address) << 8;
}
};
union DepthRenderControl {
u32 raw;
BitField<0, 1, u32> depth_clear_enable;
BitField<1, 1, u32> stencil_clear_enable;
BitField<6, 1, u32> depth_compress_disable;
};
union AaConfig {
BitField<0, 3, u32> msaa_num_samples;
BitField<4, 1, u32> aa_mask_centroid_dtmn;
BitField<13, 4, u32> max_sample_dst;
BitField<20, 3, u32> msaa_exposed_samples;
BitField<24, 2, u32> detail_to_exposed_mode;
u32 NumSamples() const {
return 1 << msaa_num_samples;
}
};
union Regs {
struct {
INSERT_PADDING_WORDS(0x2C08);
ShaderProgram ps_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram vs_program;
INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16);
ComputeProgram cs_program;
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(4);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;
float depth_bounds_max;
u32 stencil_clear;
float depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2);
std::array<ViewportScissor, NumViewports> viewport_scissors;
std::array<ViewportDepth, NumViewports> viewport_depths;
INSERT_PADDING_WORDS(0xA105 - 0xA0D4);
BlendConstants blend_constants;
INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4);
StencilControl stencil_control;
StencilRefMask stencil_ref_front;
StencilRefMask stencil_ref_back;
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3);
std::array<BlendControl, NumColorBuffers> blend_control;
INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8);
IndexBufferBase index_base_address;
INSERT_PADDING_WORDS(1);
u32 draw_initiator;
INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4);
DepthControl depth_control;
INSERT_PADDING_WORDS(1);
ColorControl color_control;
DepthBufferControl depth_buffer_control;
ClipperControl clipper_control;
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 2);
u32 index_size;
u32 max_index_size;
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
PolygonOffset poly_offset;
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
AaConfig aa_config;
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
ColorBuffer color_buffers[NumColorBuffers];
INSERT_PADDING_WORDS(0xC242 - 0xA390);
PrimitiveType primitive_type;
INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices;
VgtNumInstances num_instances;
};
std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
};
Regs regs{};
// See for a comment in context reg parsing code
union CbDbExtent {
struct {
u16 width;
u16 height;
};
u32 raw{0u};
[[nodiscard]] bool Valid() const {
return raw != 0;
}
};
std::array<CbDbExtent, NumColorBuffers> last_cb_extent{};
CbDbExtent last_db_extent{};
public:
Liverpool();
~Liverpool();
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
void SubmitAsc(u32 vqid, std::span<const u32> acb);
bool IsGpuIdle() const {
return num_submits == 0;
}
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
}
private:
struct Task {
struct promise_type {
auto get_return_object() {
Task task{};
task.handle = std::coroutine_handle<promise_type>::from_promise(*this);
return task;
}
static constexpr std::suspend_always initial_suspend() noexcept {
// We want the task to be suspended at start
return {};
}
static constexpr std::suspend_always final_suspend() noexcept {
return {};
}
void unhandled_exception() {
UNREACHABLE();
}
void return_void() {}
struct empty {};
std::suspend_always yield_value(empty&&) {
return {};
}
};
using Handle = std::coroutine_handle<promise_type>;
Handle handle;
};
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
Task ProcessCeUpdate(std::span<const u32> ccb);
Task ProcessCompute(std::span<const u32> acb, int vqid);
void Process(std::stop_token stoken);
struct GpuQueue {
std::mutex m_access{};
std::queue<Task::Handle> submits{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
struct ConstantEngine {
void Reset() {
ce_count = 0;
de_count = 0;
ce_compare_count = 0;
}
[[nodiscard]] u32 Diff() const {
ASSERT_MSG(ce_count >= de_count, "DE counter is ahead of CE");
return ce_count - de_count;
}
u32 ce_compare_count{};
u32 ce_count{};
u32 de_count{};
static std::array<u8, 48_KB> constants_heap;
} cblock{};
Vulkan::Rasterizer* rasterizer{};
std::jthread process_thread{};
std::atomic<u32> num_submits{};
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
#undef GFX6_3D_REG_INDEX
} // namespace AmdGpu