Metadata support (#223)
* texture_cache: more image usage flags * texture_cache: metadata registration * renderer_vulkan: initial CMask support * renderer_vulkan: skip redundant FCE and FMask decompression passes * renderer_vulkan: redundant VO surface registration removed * renderer_vulkan: initial HTile support * renderer_vulkan: added support for MSAA attachments * renderer_vulkan: skip unnecessary metadata updates
This commit is contained in:
parent
059f54838a
commit
2cbbcbd371
|
@ -294,15 +294,19 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
switch (image.GetType()) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
case AmdGpu::ImageType::Color1D: // x
|
||||
return {body->Arg(0), body->Arg(1)};
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
case AmdGpu::ImageType::Color1DArray: // x, slice
|
||||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color2D: // x, y
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
case AmdGpu::ImageType::Color2DArray: // x, y, slice
|
||||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
|
||||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube:
|
||||
case AmdGpu::ImageType::Cube: // x, y, face
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
|
|
|
@ -324,6 +324,10 @@ struct Liverpool {
|
|||
BitField<0, 2, ZFormat> format;
|
||||
BitField<2, 2, u32> num_samples;
|
||||
BitField<13, 3, u32> tile_split;
|
||||
BitField<27, 1, u32> allow_expclear;
|
||||
BitField<28, 1, u32> read_size;
|
||||
BitField<29, 1, u32> tile_surface_en;
|
||||
BitField<31, 1, u32> zrange_precision;
|
||||
} z_info;
|
||||
union {
|
||||
BitField<0, 1, StencilFormat> format;
|
||||
|
@ -352,7 +356,7 @@ struct Liverpool {
|
|||
return u64(z_read_base) << 8;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetSizeAligned() const {
|
||||
size_t GetSizeAligned() const {
|
||||
return depth_slice.tile_max * 8;
|
||||
}
|
||||
};
|
||||
|
@ -606,6 +610,20 @@ struct Liverpool {
|
|||
BitField<30, 1, u32> enable;
|
||||
};
|
||||
|
||||
union ColorControl {
|
||||
enum class OperationMode : u32 {
|
||||
Disable = 0u,
|
||||
Normal = 1u,
|
||||
EliminateFastClear = 2u,
|
||||
Resolve = 3u,
|
||||
FmaskDecompress = 5u,
|
||||
};
|
||||
|
||||
BitField<3, 1, u32> degamma_enable;
|
||||
BitField<4, 3, OperationMode> mode;
|
||||
BitField<16, 8, u32> rop3;
|
||||
};
|
||||
|
||||
struct ColorBuffer {
|
||||
enum class EndianSwap : u32 {
|
||||
None = 0,
|
||||
|
@ -688,11 +706,15 @@ struct Liverpool {
|
|||
return u64(base_address) << 8;
|
||||
}
|
||||
|
||||
u64 CmaskAddress() const {
|
||||
return u64(cmask_base_address) << 8;
|
||||
VAddr CmaskAddress() const {
|
||||
return VAddr(cmask_base_address) << 8;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetSizeAligned() const {
|
||||
VAddr FmaskAddress() const {
|
||||
return VAddr(fmask_base_address) << 8;
|
||||
}
|
||||
|
||||
size_t GetSizeAligned() const {
|
||||
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
||||
const auto slice_size = (slice.tile_max + 1) * 64u;
|
||||
const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element;
|
||||
|
@ -700,11 +722,11 @@ struct Liverpool {
|
|||
return total_size;
|
||||
}
|
||||
|
||||
[[nodiscard]] TilingMode GetTilingMode() const {
|
||||
TilingMode GetTilingMode() const {
|
||||
return attrib.tile_mode_index;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsTiled() const {
|
||||
bool IsTiled() const {
|
||||
return !info.linear_general;
|
||||
}
|
||||
|
||||
|
@ -769,6 +791,18 @@ struct Liverpool {
|
|||
BitField<1, 1, u32> stencil_clear_enable;
|
||||
};
|
||||
|
||||
union AaConfig {
|
||||
BitField<0, 3, u32> msaa_num_samples;
|
||||
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
||||
BitField<13, 4, u32> max_sample_dst;
|
||||
BitField<20, 3, u32> msaa_exposed_samples;
|
||||
BitField<24, 2, u32> detail_to_exposed_mode;
|
||||
|
||||
u32 NumSamples() const {
|
||||
return 1 << msaa_num_samples;
|
||||
}
|
||||
};
|
||||
|
||||
union Regs {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x2C08);
|
||||
|
@ -821,7 +855,8 @@ struct Liverpool {
|
|||
u32 draw_initiator;
|
||||
INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4);
|
||||
DepthControl depth_control;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
INSERT_PADDING_WORDS(1);
|
||||
ColorControl color_control;
|
||||
DepthBufferControl depth_buffer_control;
|
||||
ClipperControl clipper_control;
|
||||
PolygonControl polygon_control;
|
||||
|
@ -835,7 +870,9 @@ struct Liverpool {
|
|||
u32 enable_primitive_id;
|
||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
|
||||
PolygonOffset poly_offset;
|
||||
INSERT_PADDING_WORDS(0xA318 - 0xA2DF - 5);
|
||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||
AaConfig aa_config;
|
||||
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
|
||||
ColorBuffer color_buffers[NumColorBuffers];
|
||||
INSERT_PADDING_WORDS(0xC242 - 0xA390);
|
||||
PrimitiveType primitive_type;
|
||||
|
@ -991,6 +1028,7 @@ static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
|
|||
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
||||
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
|
@ -998,6 +1036,7 @@ static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
|||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
|
||||
namespace Vulkan::LiverpoolToVK {
|
||||
|
@ -381,6 +382,13 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
|||
case vk::Format::eB8G8R8A8Srgb:
|
||||
return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb;
|
||||
}
|
||||
} else {
|
||||
if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) {
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
if (is_vo_surface && base_format == vk::Format::eB8G8R8A8Srgb) {
|
||||
return vk::Format::eB8G8R8A8Unorm;
|
||||
}
|
||||
}
|
||||
return base_format;
|
||||
}
|
||||
|
@ -422,4 +430,69 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
|
|||
}
|
||||
}
|
||||
|
||||
static constexpr float U8ToUnorm(u8 v) {
|
||||
static constexpr auto c = 1.0f / 255.0f;
|
||||
return float(v * c);
|
||||
}
|
||||
|
||||
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||
const auto comp_swap = color_buffer.info.comp_swap.Value();
|
||||
ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard ||
|
||||
comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate,
|
||||
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
|
||||
|
||||
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
|
||||
|
||||
const auto& c0 = color_buffer.clear_word0;
|
||||
const auto& c1 = color_buffer.clear_word1;
|
||||
const auto num_bits = AmdGpu::NumBits(color_buffer.info.format);
|
||||
|
||||
vk::ClearColorValue color{};
|
||||
switch (color_buffer.info.number_type) {
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
[[fallthrough]];
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
[[fallthrough]];
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
[[fallthrough]];
|
||||
case AmdGpu::NumberFormat::Srgb: {
|
||||
switch (num_bits) {
|
||||
case 32: {
|
||||
color.float32 = std::array{
|
||||
U8ToUnorm((c0 >> (comp_swap_alt ? 16 : 0)) & 0xff),
|
||||
U8ToUnorm((c0 >> 8) & 0xff),
|
||||
U8ToUnorm((c0 >> (comp_swap_alt ? 0 : 16)) & 0xff),
|
||||
U8ToUnorm((c0 >> 24) & 0xff),
|
||||
};
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG_ERROR(Render_Vulkan, "Missing clear color conversion for bits {}", num_bits);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG_ERROR(Render_Vulkan, "Missing clear color conversion for type {}",
|
||||
color_buffer.info.number_type.Value());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return {.color = color};
|
||||
}
|
||||
|
||||
vk::SampleCountFlagBits NumSamples(u32 num_samples) {
|
||||
switch (num_samples) {
|
||||
case 1:
|
||||
return vk::SampleCountFlagBits::e1;
|
||||
case 2:
|
||||
return vk::SampleCountFlagBits::e2;
|
||||
case 4:
|
||||
return vk::SampleCountFlagBits::e4;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan::LiverpoolToVK
|
||||
|
|
|
@ -46,6 +46,10 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
|||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
||||
Liverpool::DepthBuffer::StencilFormat stencil_format);
|
||||
|
||||
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer);
|
||||
|
||||
vk::SampleCountFlagBits NumSamples(u32 num_samples);
|
||||
|
||||
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
|
||||
|
||||
} // namespace Vulkan::LiverpoolToVK
|
||||
|
|
|
@ -40,7 +40,8 @@ public:
|
|||
|
||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address) {
|
||||
auto& image = RegisterVideoOutSurface(attribute, cpu_address);
|
||||
const auto info = VideoCore::ImageInfo{attribute};
|
||||
auto& image = texture_cache.FindImage(info, cpu_address);
|
||||
return PrepareFrameInternal(image);
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
|
||||
void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||
|
@ -93,12 +93,11 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const VAddr addr = vsharp.base_address.Value();
|
||||
texture_cache.OnCpuWrite(addr);
|
||||
const u32 offset = staging.Copy(addr, size,
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
texture_cache.OnCpuWrite(address);
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
: instance.UniformMinAlignment());
|
||||
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
|
@ -109,6 +108,21 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
||||
// can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
|
||||
// need its full emulation anyways. For cases of metadata read a warning will be logged.
|
||||
if (buffer.is_storage) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& image : info.images) {
|
||||
|
@ -124,6 +138,10 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
|
||||
if (texture_cache.IsMeta(tsharp.Address())) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)");
|
||||
}
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
|
@ -139,11 +157,13 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
});
|
||||
}
|
||||
|
||||
if (!set_writes.empty()) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
if (set_writes.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -31,7 +31,7 @@ public:
|
|||
return *pipeline;
|
||||
}
|
||||
|
||||
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
bool BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -92,7 +92,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
};
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples),
|
||||
.sampleShadingEnable = false,
|
||||
};
|
||||
|
||||
|
@ -327,8 +327,9 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
: instance.UniformMinAlignment());
|
||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||
|
@ -341,6 +342,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& image : stage.images) {
|
||||
|
@ -357,6 +362,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
|
||||
if (texture_cache.IsMeta(tsharp.Address())) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
|
||||
}
|
||||
}
|
||||
for (const auto& sampler : stage.samplers) {
|
||||
const auto ssharp =
|
||||
|
|
|
@ -38,6 +38,7 @@ struct GraphicsPipelineKey {
|
|||
float depth_bias_slope_factor;
|
||||
float depth_bias_clamp;
|
||||
u32 depth_bias_enable;
|
||||
u32 num_samples = 1;
|
||||
Liverpool::StencilControl stencil;
|
||||
Liverpool::StencilRefMask stencil_ref_front;
|
||||
Liverpool::StencilRefMask stencil_ref_back;
|
||||
|
|
|
@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
|
|||
.logicOp = features.logicOp,
|
||||
.samplerAnisotropy = features.samplerAnisotropy,
|
||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -114,12 +114,18 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
key.cull_mode = regs.polygon_control.CullingMode();
|
||||
key.clip_space = regs.clipper_control.clip_space;
|
||||
key.front_face = regs.polygon_control.front_face;
|
||||
key.num_samples = regs.aa_config.NumSamples();
|
||||
|
||||
const auto& db = regs.depth_buffer;
|
||||
if (key.depth.depth_enable) {
|
||||
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
|
||||
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
||||
}
|
||||
|
||||
// TODO: Should be a check for `OperationMode::Disable` once we emulate HW state init packet
|
||||
// sent by system software.
|
||||
const auto skip_cb_binding = false;
|
||||
|
||||
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
|
||||
// We need to do some arrays compaction at this stage
|
||||
|
@ -129,7 +135,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
int remapped_cb{};
|
||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
if (!col_buf) {
|
||||
if (!col_buf || skip_cb_binding) {
|
||||
continue;
|
||||
}
|
||||
const auto base_format =
|
||||
|
@ -160,6 +166,19 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
// actual draw hence can skip pipeline creation.
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
|
||||
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
|
||||
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
||||
return {};
|
||||
}
|
||||
|
||||
u32 binding{};
|
||||
std::array<Shader::IR::Program, MaxShaderStages> programs;
|
||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||
|
|
|
@ -40,12 +40,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
const auto& regs = liverpool->regs;
|
||||
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
|
||||
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
|
||||
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
|
||||
color_attachments{};
|
||||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
u32 num_depth_attachments{};
|
||||
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
|
||||
const auto& col_buf = regs.color_buffers[col_buf_id];
|
||||
if (!col_buf) {
|
||||
|
@ -55,17 +57,26 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
||||
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||
color_attachments.push_back({
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
.loadOp = vk::AttachmentLoadOp::eLoad,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue =
|
||||
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
|
||||
});
|
||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
|
||||
}
|
||||
|
||||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
u32 num_depth_attachments{};
|
||||
if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) {
|
||||
const bool is_clear = regs.depth_render_control.depth_clear_enable;
|
||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address);
|
||||
const auto& image_view =
|
||||
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
|
||||
texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent);
|
||||
depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
|
@ -74,6 +85,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
|
||||
.stencil = regs.stencil_clear}},
|
||||
};
|
||||
texture_cache.TouchMeta(htile_address, false);
|
||||
num_depth_attachments++;
|
||||
}
|
||||
|
||||
|
@ -112,7 +124,14 @@ void Rasterizer::DispatchDirect() {
|
|||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto& cs_program = liverpool->regs.cs_program;
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!has_resources) {
|
||||
return;
|
||||
}
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||
|
|
|
@ -91,7 +91,7 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
|||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
}
|
||||
if (info.is_tiled || info.is_storage) {
|
||||
if (info.is_tiled || info.usage.storage) {
|
||||
usage |= vk::ImageUsageFlagBits::eStorage;
|
||||
}
|
||||
return usage;
|
||||
|
@ -149,10 +149,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||
size.depth = 1;
|
||||
pitch = size.width;
|
||||
guest_size_bytes = buffer.GetSizeAligned();
|
||||
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
|
||||
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
|
||||
usage.render_target = true;
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
|
||||
is_tiled = false;
|
||||
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
||||
|
@ -163,6 +165,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
|||
size.depth = 1;
|
||||
pitch = size.width;
|
||||
guest_size_bytes = buffer.GetSizeAligned();
|
||||
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
||||
usage.depth_target = true;
|
||||
}
|
||||
|
||||
|
@ -178,6 +181,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||
resources.levels = image.NumLevels();
|
||||
resources.layers = image.NumLayers();
|
||||
guest_size_bytes = image.GetSizeAligned();
|
||||
usage.texture = true;
|
||||
}
|
||||
|
||||
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
||||
|
@ -248,6 +252,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
},
|
||||
.mipLevels = static_cast<u32>(info.resources.levels),
|
||||
.arrayLayers = static_cast<u32>(info.resources.layers),
|
||||
.samples = LiverpoolToVK::NumSamples(info.num_samples),
|
||||
.tiling = vk::ImageTiling::eOptimal,
|
||||
.usage = usage,
|
||||
.initialLayout = vk::ImageLayout::eUndefined,
|
||||
|
|
|
@ -30,6 +30,7 @@ enum ImageFlagBits : u32 {
|
|||
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU
|
||||
Registered = 1 << 6, ///< True when the image is registered
|
||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||
|
||||
|
@ -38,7 +39,7 @@ struct ImageInfo {
|
|||
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
|
||||
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||
explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||
|
||||
|
@ -49,16 +50,21 @@ struct ImageInfo {
|
|||
bool IsPacked() const;
|
||||
bool IsDepthStencil() const;
|
||||
|
||||
struct {
|
||||
VAddr cmask_addr;
|
||||
VAddr fmask_addr;
|
||||
VAddr htile_addr;
|
||||
} meta_info{};
|
||||
|
||||
struct {
|
||||
u32 texture : 1;
|
||||
u32 storage : 1;
|
||||
u32 render_target : 1;
|
||||
u32 depth_target : 1;
|
||||
u32 vo_buffer : 1;
|
||||
} usage; // Usage data tracked during image lifetime
|
||||
} usage{}; // Usage data tracked during image lifetime
|
||||
|
||||
bool is_tiled = false;
|
||||
bool is_storage = false;
|
||||
vk::Format pixel_format = vk::Format::eUndefined;
|
||||
vk::ImageType type = vk::ImageType::e1D;
|
||||
SubresourceExtent resources;
|
||||
|
|
|
@ -131,6 +131,8 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re
|
|||
image_id = image_ids[0];
|
||||
}
|
||||
|
||||
RegisterMeta(info, image_id);
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) &&
|
||||
(!image_ids.empty() || refresh_on_create)) {
|
||||
|
@ -150,7 +152,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
|
|||
// impossible to use. However, during view creation, if an image isn't used as storage we can
|
||||
// temporary remove its storage bit.
|
||||
std::optional<vk::ImageUsageFlags> usage_override;
|
||||
if (!image.info.is_storage) {
|
||||
if (!image.info.usage.storage) {
|
||||
usage_override = image.usage & ~vk::ImageUsageFlagBits::eStorage;
|
||||
}
|
||||
|
||||
|
@ -161,12 +163,15 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
|
|||
}
|
||||
|
||||
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
|
||||
Image& image = FindImage(ImageInfo{desc}, desc.Address());
|
||||
const ImageInfo info{desc};
|
||||
Image& image = FindImage(info, desc.Address());
|
||||
|
||||
if (is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
image.info.usage.storage = true;
|
||||
} else {
|
||||
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead);
|
||||
image.info.usage.texture = true;
|
||||
}
|
||||
|
||||
const ImageViewInfo view_info{desc, is_storage};
|
||||
|
@ -183,13 +188,16 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
|
|||
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits::eColorAttachmentRead);
|
||||
|
||||
image.info.usage.render_target = true;
|
||||
|
||||
ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer};
|
||||
return RegisterImageView(image, view_info);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||
const ImageInfo info{buffer, hint};
|
||||
const ImageInfo info{buffer, htile_address, hint};
|
||||
auto& image = FindImage(info, buffer.Address(), false);
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
|
@ -197,6 +205,8 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
|
|||
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
||||
|
||||
image.info.usage.depth_target = true;
|
||||
|
||||
ImageViewInfo view_info;
|
||||
view_info.format = info.pixel_format;
|
||||
return RegisterImageView(image, view_info);
|
||||
|
@ -276,6 +286,47 @@ void TextureCache::RegisterImage(ImageId image_id) {
|
|||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
}
|
||||
|
||||
void TextureCache::RegisterMeta(const ImageInfo& info, ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
|
||||
if (image.flags & ImageFlagBits::MetaRegistered) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool registered = true;
|
||||
// Current resource tracking implementation allows us to detect usage of meta only in the last
|
||||
// moment, so we likely will miss its first clear. To avoid this and make first frame, where
|
||||
// the meta is encountered, looks correct we set its state to "cleared" at registrations time.
|
||||
if (info.usage.render_target) {
|
||||
if (info.meta_info.cmask_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.cmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true});
|
||||
image.info.meta_info.cmask_addr = info.meta_info.cmask_addr;
|
||||
}
|
||||
|
||||
if (info.meta_info.fmask_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.fmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true});
|
||||
image.info.meta_info.fmask_addr = info.meta_info.fmask_addr;
|
||||
}
|
||||
} else if (info.usage.depth_target) {
|
||||
if (info.meta_info.htile_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.htile_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true});
|
||||
image.info.meta_info.htile_addr = info.meta_info.htile_addr;
|
||||
}
|
||||
} else {
|
||||
registered = false;
|
||||
}
|
||||
|
||||
if (registered) {
|
||||
image.flags |= ImageFlagBits::MetaRegistered;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
||||
|
|
|
@ -29,6 +29,17 @@ class TextureCache {
|
|||
static constexpr u64 PageBits = 20;
|
||||
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
|
||||
|
||||
struct MetaDataInfo {
|
||||
enum class Type {
|
||||
CMask,
|
||||
FMask,
|
||||
HTile,
|
||||
};
|
||||
|
||||
Type type;
|
||||
bool is_cleared;
|
||||
};
|
||||
|
||||
public:
|
||||
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||
~TextureCache();
|
||||
|
@ -47,6 +58,7 @@ public:
|
|||
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint);
|
||||
[[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint);
|
||||
|
||||
/// Reuploads image contents.
|
||||
|
@ -60,6 +72,27 @@ public:
|
|||
return slot_images[id];
|
||||
}
|
||||
|
||||
bool IsMeta(VAddr address) const {
|
||||
return surface_metas.contains(address);
|
||||
}
|
||||
|
||||
bool IsMetaCleared(VAddr address) const {
|
||||
const auto& it = surface_metas.find(address);
|
||||
if (it != surface_metas.end()) {
|
||||
return it.value().is_cleared;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TouchMeta(VAddr address, bool is_clear) {
|
||||
auto it = surface_metas.find(address);
|
||||
if (it != surface_metas.end()) {
|
||||
it.value().is_cleared = is_clear;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info);
|
||||
|
||||
|
@ -123,6 +156,9 @@ private:
|
|||
/// Register image in the page table
|
||||
void RegisterImage(ImageId image);
|
||||
|
||||
/// Register metadata surfaces attached to the image
|
||||
void RegisterMeta(const ImageInfo& info, ImageId image);
|
||||
|
||||
/// Unregister image from the page table
|
||||
void UnregisterImage(ImageId image);
|
||||
|
||||
|
@ -145,6 +181,7 @@ private:
|
|||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
|
||||
std::mutex mutex;
|
||||
#ifdef _WIN64
|
||||
void* veh_handle{};
|
||||
|
|
Loading…
Reference in New Issue