review iteration
This commit is contained in:
parent
e790ec775f
commit
dd5a25fda2
|
@ -83,15 +83,6 @@ ComputePipeline::~ComputePipeline() = default;
|
|||
|
||||
void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
static const u64 MinUniformAlignment = instance.UniformMinAlignment();
|
||||
|
||||
const auto map_staging = [&](auto src, size_t size) {
|
||||
const auto [data, offset, _] = staging.Map(size, MinUniformAlignment);
|
||||
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||
staging.Commit(size);
|
||||
return offset;
|
||||
};
|
||||
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||
|
@ -103,7 +94,8 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
const u32 size = vsharp.GetSize();
|
||||
const VAddr addr = vsharp.base_address.Value();
|
||||
texture_cache.OnCpuWrite(addr);
|
||||
const u32 offset = map_staging(addr, size);
|
||||
const u32 offset =
|
||||
staging.Copy(addr, size, buffer.is_storage ? 4 : instance.UniformMinAlignment());
|
||||
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -278,79 +278,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
|
||||
void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
static const u64 MinUniformAlignment = instance.UniformMinAlignment();
|
||||
|
||||
const auto map_staging = [&](auto src, size_t size) {
|
||||
const auto [data, offset, _] = staging.Map(size, MinUniformAlignment);
|
||||
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||
staging.Commit(size);
|
||||
return offset;
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
const auto& vs_info = stages[0];
|
||||
if (!vs_info.vs_inputs.empty()) {
|
||||
std::array<vk::Buffer, MaxVertexBufferCount> host_buffers;
|
||||
std::array<vk::DeviceSize, MaxVertexBufferCount> host_offsets;
|
||||
boost::container::static_vector<AmdGpu::Buffer, MaxVertexBufferCount> guest_buffers;
|
||||
|
||||
struct BufferRange {
|
||||
VAddr base_address;
|
||||
VAddr end_address;
|
||||
u64 offset; // offset in the mapped memory
|
||||
|
||||
size_t GetSize() const {
|
||||
return end_address - base_address;
|
||||
}
|
||||
};
|
||||
|
||||
// Calculate buffers memory overlaps
|
||||
std::vector<BufferRange> ranges{};
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
const auto& buffer = guest_buffers.emplace_back(
|
||||
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));
|
||||
ranges.emplace_back(buffer.base_address.Value(),
|
||||
buffer.base_address.Value() + buffer.GetSize());
|
||||
}
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
});
|
||||
|
||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
ranges_merged.emplace_back(range);
|
||||
} else {
|
||||
ranges_merged.back().end_address =
|
||||
std::max(prev_range.end_address, range.end_address);
|
||||
}
|
||||
}
|
||||
|
||||
// Map buffers
|
||||
for (auto& range : ranges_merged) {
|
||||
range.offset = map_staging(range.base_address, range.GetSize());
|
||||
}
|
||||
|
||||
// Bind vertex buffers
|
||||
const size_t num_buffers = guest_buffers.size();
|
||||
for (u32 i = 0; i < num_buffers; ++i) {
|
||||
const auto& buffer = guest_buffers[i];
|
||||
const auto& host_buffer = std::ranges::find_if(
|
||||
ranges_merged.cbegin(), ranges_merged.cend(), [&](const BufferRange& range) {
|
||||
return (buffer.base_address >= range.base_address);
|
||||
});
|
||||
assert(host_buffer != ranges_merged.cend());
|
||||
|
||||
host_buffers[i] = staging.Handle();
|
||||
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
|
||||
}
|
||||
}
|
||||
BindVertexBuffers(staging);
|
||||
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||
|
@ -362,7 +290,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = map_staging(vsharp.base_address.Value(), size);
|
||||
const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
|
||||
buffer.is_storage ? 4 : instance.UniformMinAlignment());
|
||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
|
@ -406,9 +335,76 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
}
|
||||
|
||||
if (!set_writes.empty()) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
}
|
||||
}
|
||||
|
||||
void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
||||
const auto& vs_info = stages[0];
|
||||
if (vs_info.vs_inputs.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::array<vk::Buffer, MaxVertexBufferCount> host_buffers;
|
||||
std::array<vk::DeviceSize, MaxVertexBufferCount> host_offsets;
|
||||
boost::container::static_vector<AmdGpu::Buffer, MaxVertexBufferCount> guest_buffers;
|
||||
|
||||
struct BufferRange {
|
||||
VAddr base_address;
|
||||
VAddr end_address;
|
||||
u64 offset; // offset in the mapped memory
|
||||
|
||||
size_t GetSize() const {
|
||||
return end_address - base_address;
|
||||
}
|
||||
};
|
||||
|
||||
// Calculate buffers memory overlaps
|
||||
std::vector<BufferRange> ranges{};
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
const auto& buffer = guest_buffers.emplace_back(
|
||||
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));
|
||||
ranges.emplace_back(buffer.base_address.Value(),
|
||||
buffer.base_address.Value() + buffer.GetSize());
|
||||
}
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
});
|
||||
|
||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
ranges_merged.emplace_back(range);
|
||||
} else {
|
||||
ranges_merged.back().end_address = std::max(prev_range.end_address, range.end_address);
|
||||
}
|
||||
}
|
||||
|
||||
// Map buffers
|
||||
for (auto& range : ranges_merged) {
|
||||
range.offset = staging.Copy(range.base_address, range.GetSize(), 4);
|
||||
}
|
||||
|
||||
// Bind vertex buffers
|
||||
const size_t num_buffers = guest_buffers.size();
|
||||
for (u32 i = 0; i < num_buffers; ++i) {
|
||||
const auto& buffer = guest_buffers[i];
|
||||
const auto& host_buffer = std::ranges::find_if(
|
||||
ranges_merged.cbegin(), ranges_merged.cend(),
|
||||
[&](const BufferRange& range) { return (buffer.base_address >= range.base_address); });
|
||||
assert(host_buffer != ranges_merged.cend());
|
||||
|
||||
host_buffers[i] = staging.Handle();
|
||||
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -75,6 +75,7 @@ public:
|
|||
|
||||
private:
|
||||
void BuildDescSetLayout();
|
||||
void BindVertexBuffers(StreamBuffer& staging) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
|
|
@ -231,4 +231,12 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
|||
}
|
||||
}
|
||||
|
||||
u64 StreamBuffer::Copy(VAddr src, size_t size, size_t alignment /*= 0*/) {
|
||||
static const u64 MinUniformAlignment = instance.UniformMinAlignment();
|
||||
const auto [data, offset, _] = Map(size, MinUniformAlignment);
|
||||
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||
Commit(size);
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -40,6 +40,9 @@ public:
|
|||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Commit(u64 size);
|
||||
|
||||
/// Maps and commits a memory region with user provided data
|
||||
u64 Copy(VAddr src, size_t size, size_t alignment = 0);
|
||||
|
||||
vk::Buffer Handle() const noexcept {
|
||||
return buffer;
|
||||
}
|
||||
|
|
|
@ -194,12 +194,7 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
{
|
||||
if (!tile_manager.TryDetile(image)) {
|
||||
// Upload data to the staging buffer.
|
||||
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4);
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
// Copy to the image.
|
||||
|
@ -217,6 +212,7 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
}
|
||||
|
|
|
@ -304,16 +304,12 @@ bool TileManager::TryDetile(Image& image) {
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4);
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
|
||||
auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
|
||||
const vk::DescriptorBufferInfo input_buffer_info{
|
||||
.buffer = staging.Handle(),
|
||||
.offset = offset,
|
||||
|
|
Loading…
Reference in New Issue