video_core: Attempt to fix amd regression

This commit is contained in:
IndecisiveTurtle 2024-08-29 22:01:02 +03:00
parent f00ec3ddbc
commit 0ba8eee30d
10 changed files with 18 additions and 10 deletions

View File

@ -187,6 +187,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddCapability(spv::Capability::Int64); ctx.AddCapability(spv::Capability::Int64);
if (info.has_storage_images || info.has_image_buffers) { if (info.has_storage_images || info.has_image_buffers) {
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
} }
if (info.has_texel_buffers) { if (info.has_texel_buffers) {

View File

@ -266,7 +266,8 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
const auto& buffer = ctx.texture_buffers[handle]; const auto& buffer = ctx.texture_buffers[handle];
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
Id texel = ctx.OpImageFetch(buffer.result_type, tex_buffer, coord); Id texel = buffer.is_image ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord)
: ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
if (buffer.is_integer) { if (buffer.is_integer) {
texel = ctx.OpBitcast(ctx.F32[4], texel); texel = ctx.OpBitcast(ctx.F32[4], texel);
} }

View File

@ -406,6 +406,7 @@ void EmitContext::DefineTextureBuffers() {
.image_type = image_type, .image_type = image_type,
.result_type = sampled_type[4], .result_type = sampled_type[4],
.is_integer = is_integer, .is_integer = is_integer,
.is_image = desc.is_written,
}); });
interfaces.push_back(id); interfaces.push_back(id);
} }

View File

@ -215,6 +215,7 @@ public:
Id image_type; Id image_type;
Id result_type; Id result_type;
bool is_integer; bool is_integer;
bool is_image;
}; };
u32& binding; u32& binding;

View File

@ -119,7 +119,7 @@ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataF
dfmt == view.dfmt && nfmt == view.nfmt; dfmt == view.dfmt && nfmt == view.nfmt;
})}; })};
if (it != views.end()) { if (it != views.end()) {
return it->handle; return *it->handle;
} }
const vk::BufferUsageFlags2CreateInfoKHR usage_flags = { const vk::BufferUsageFlags2CreateInfoKHR usage_flags = {
.usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer .usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer
@ -138,9 +138,10 @@ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataF
.is_written = is_written, .is_written = is_written,
.dfmt = dfmt, .dfmt = dfmt,
.nfmt = nfmt, .nfmt = nfmt,
.handle = instance->GetDevice().createBufferView(view_ci), .handle = instance->GetDevice().createBufferViewUnique(view_ci),
}); });
return views.back().handle; LOG_INFO(Render_Vulkan, "Num buffer views {}", views.size());
return *views.back().handle;
} }
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;

View File

@ -134,7 +134,7 @@ public:
bool is_written; bool is_written;
AmdGpu::DataFormat dfmt; AmdGpu::DataFormat dfmt;
AmdGpu::NumberFormat nfmt; AmdGpu::NumberFormat nfmt;
vk::BufferView handle; vk::UniqueBufferView handle;
}; };
std::vector<BufferView> views; std::vector<BufferView> views;
}; };

View File

@ -228,11 +228,12 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
return regs.num_indices; return regs.num_indices;
} }
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written) { std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
bool is_texel_buffer) {
std::scoped_lock lk{mutex}; std::scoped_lock lk{mutex};
static constexpr u64 StreamThreshold = CACHING_PAGESIZE; static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
if (!is_written && size < StreamThreshold && !is_gpu_dirty) { if (!is_written && !is_texel_buffer && size < StreamThreshold && !is_gpu_dirty) {
// For small uniform buffers that have not been modified by gpu // For small uniform buffers that have not been modified by gpu
// use device local stream buffer to reduce renderpass breaks. // use device local stream buffer to reduce renderpass breaks.
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());

View File

@ -66,7 +66,9 @@ public:
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written); [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size,
bool is_written,
bool is_texel_buffer = false);
/// Obtains a temporary buffer for usage in texture cache. /// Obtains a temporary buffer for usage in texture cache.
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size); [[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);

View File

@ -239,7 +239,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
} }
const u32 alignment = instance.TexelBufferMinAlignment(); const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written); buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(), ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");

View File

@ -412,7 +412,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
const u32 size = vsharp.GetSize(); const u32 size = vsharp.GetSize();
const u32 alignment = instance.TexelBufferMinAlignment(); const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written); buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written, true);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(), ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");