video_core: Track renderpass scopes properly
This commit is contained in:
parent
ad10020836
commit
22b930ba5e
|
@ -209,6 +209,8 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
||||||
|
|
||||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||||
bool is_eop /*= false*/) {
|
bool is_eop /*= false*/) {
|
||||||
|
std::scoped_lock lock{mutex};
|
||||||
|
|
||||||
Vulkan::Frame* frame;
|
Vulkan::Frame* frame;
|
||||||
if (index == -1) {
|
if (index == -1) {
|
||||||
frame = renderer->PrepareBlankFrame();
|
frame = renderer->PrepareBlankFrame();
|
||||||
|
@ -218,8 +220,6 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||||
frame = renderer->PrepareFrame(group, buffer.address_left);
|
frame = renderer->PrepareFrame(group, buffer.address_left);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lock{mutex};
|
|
||||||
|
|
||||||
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
|
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
|
||||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -242,7 +242,7 @@ void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
|
||||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) {
|
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) {
|
||||||
auto* port = driver->GetPort(handle);
|
auto* port = driver->GetPort(handle);
|
||||||
if (!port) {
|
if (!port) {
|
||||||
return 0x8029000b;
|
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
Platform::IrqC::Instance()->RegisterOnce(
|
Platform::IrqC::Instance()->RegisterOnce(
|
||||||
|
|
|
@ -126,7 +126,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||||
std::jthread([this](std::stop_token stop_token) { linker->Execute(); });
|
std::jthread([this](std::stop_token stop_token) { linker->Execute(); });
|
||||||
|
|
||||||
// Begin main window loop until the application exits
|
// Begin main window loop until the application exits
|
||||||
static constexpr std::chrono::microseconds FlipPeriod{10};
|
static constexpr std::chrono::milliseconds FlipPeriod{16};
|
||||||
|
|
||||||
while (window.isOpen()) {
|
while (window.isOpen()) {
|
||||||
window.waitEvent();
|
window.waitEvent();
|
||||||
|
|
|
@ -200,6 +200,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||||
}
|
}
|
||||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||||
|
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||||
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
// if (program.info.stores_frag_depth) {
|
// if (program.info.stores_frag_depth) {
|
||||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||||
// }
|
// }
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
void EmitBitCastU16F16(EmitContext&) {
|
void EmitBitCastU16F16(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
||||||
|
@ -15,11 +15,11 @@ Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitBitCastU64F64(EmitContext&) {
|
void EmitBitCastU64F64(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitBitCastF16U16(EmitContext&) {
|
void EmitBitCastF16U16(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
||||||
|
@ -27,7 +27,7 @@ Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitBitCastF64U64(EmitContext&) {
|
void EmitBitCastF64U64(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitPackUint2x32(EmitContext& ctx, Id value) {
|
Id EmitPackUint2x32(EmitContext& ctx, Id value) {
|
||||||
|
|
|
@ -115,27 +115,27 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF64x2(EmitContext&) {
|
void EmitCompositeConstructF64x2(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF64x3(EmitContext&) {
|
void EmitCompositeConstructF64x3(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeConstructF64x4(EmitContext&) {
|
void EmitCompositeConstructF64x4(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF64x2(EmitContext&) {
|
void EmitCompositeExtractF64x2(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF64x3(EmitContext&) {
|
void EmitCompositeExtractF64x3(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCompositeExtractF64x4(EmitContext&) {
|
void EmitCompositeExtractF64x4(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
|
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
|
||||||
|
|
|
@ -195,10 +195,36 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
|
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||||
|
const auto& buffer = ctx.buffers[handle];
|
||||||
|
if (info.index_enable && info.offset_enable) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else if (info.index_enable) {
|
||||||
|
for (u32 i = 0; i < 3; i++) {
|
||||||
|
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||||
|
const Id ptr{
|
||||||
|
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||||
|
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
|
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||||
|
const auto& buffer = ctx.buffers[handle];
|
||||||
|
if (info.index_enable && info.offset_enable) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else if (info.index_enable) {
|
||||||
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||||
|
const Id ptr{
|
||||||
|
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||||
|
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,34 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
struct ImageOperands {
|
||||||
|
void Add(spv::ImageOperandsMask new_mask, Id value) {
|
||||||
|
mask = static_cast<spv::ImageOperandsMask>(static_cast<u32>(mask) |
|
||||||
|
static_cast<u32>(new_mask));
|
||||||
|
operands.push_back(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
spv::ImageOperandsMask mask{};
|
||||||
|
boost::container::static_vector<Id, 4> operands;
|
||||||
|
};
|
||||||
|
|
||||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||||
Id offset) {
|
Id offset) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
|
ImageOperands operands;
|
||||||
|
if (Sirit::ValidId(offset)) {
|
||||||
|
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||||
|
}
|
||||||
|
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||||
|
@ -25,9 +41,13 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
|
||||||
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
|
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
|
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||||
|
@ -42,12 +62,16 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||||
|
|
||||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
const IR::Value& offset, const IR::Value& offset2) {
|
const IR::Value& offset, const IR::Value& offset2) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
|
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||||
|
@ -83,21 +107,21 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
||||||
case AmdGpu::ImageType::Color3D:
|
case AmdGpu::ImageType::Color3D:
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
|
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
||||||
|
|
|
@ -338,13 +338,13 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
|
||||||
Id offset);
|
Id offset);
|
||||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||||
Id offset);
|
Id offset);
|
||||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset);
|
Id coords, Id dref, Id bias_lc, const IR::Value& offset);
|
||||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||||
Id bias_lc, Id offset);
|
Id bias_lc, Id offset);
|
||||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
const IR::Value& offset, const IR::Value& offset2);
|
const IR::Value& offset, const IR::Value& offset2);
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
const IR::Value& offset, const IR::Value& offset2, Id dref);
|
const IR::Value& offset, const IR::Value& offset2, Id dref);
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||||
Id ms);
|
Id ms);
|
||||||
|
|
|
@ -11,7 +11,7 @@ Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitSelectU8(EmitContext&, Id, Id, Id) {
|
Id EmitSelectU8(EmitContext&, Id, Id, Id) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
|
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
|
||||||
|
|
|
@ -11,11 +11,11 @@ Id EmitUndefU1(EmitContext& ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitUndefU8(EmitContext&) {
|
Id EmitUndefU8(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitUndefU16(EmitContext&) {
|
Id EmitUndefU16(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitUndefU32(EmitContext& ctx) {
|
Id EmitUndefU32(EmitContext& ctx) {
|
||||||
|
@ -23,7 +23,7 @@ Id EmitUndefU32(EmitContext& ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitUndefU64(EmitContext&) {
|
Id EmitUndefU64(EmitContext&) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -118,6 +118,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||||
switch (fmt) {
|
switch (fmt) {
|
||||||
case AmdGpu::NumberFormat::Float:
|
case AmdGpu::NumberFormat::Float:
|
||||||
case AmdGpu::NumberFormat::Unorm:
|
case AmdGpu::NumberFormat::Unorm:
|
||||||
|
case AmdGpu::NumberFormat::Snorm:
|
||||||
return ctx.F32[4];
|
return ctx.F32[4];
|
||||||
case AmdGpu::NumberFormat::Sint:
|
case AmdGpu::NumberFormat::Sint:
|
||||||
return ctx.S32[4];
|
return ctx.S32[4];
|
||||||
|
@ -137,6 +138,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
||||||
switch (fmt) {
|
switch (fmt) {
|
||||||
case AmdGpu::NumberFormat::Float:
|
case AmdGpu::NumberFormat::Float:
|
||||||
case AmdGpu::NumberFormat::Unorm:
|
case AmdGpu::NumberFormat::Unorm:
|
||||||
|
case AmdGpu::NumberFormat::Snorm:
|
||||||
return {id, input_f32, F32[1], 4};
|
return {id, input_f32, F32[1], 4};
|
||||||
case AmdGpu::NumberFormat::Uint:
|
case AmdGpu::NumberFormat::Uint:
|
||||||
return {id, input_u32, U32[1], 4};
|
return {id, input_u32, U32[1], 4};
|
||||||
|
@ -253,6 +255,7 @@ void EmitContext::DefineOutputs(const Info& info) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineBuffers(const Info& info) {
|
void EmitContext::DefineBuffers(const Info& info) {
|
||||||
|
boost::container::small_vector<Id, 8> type_ids;
|
||||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||||
const Id data_type = (*data_types)[1];
|
const Id data_type = (*data_types)[1];
|
||||||
|
@ -260,13 +263,15 @@ void EmitContext::DefineBuffers(const Info& info) {
|
||||||
const u32 num_elements = stride * buffer.num_records;
|
const u32 num_elements = stride * buffer.num_records;
|
||||||
const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))};
|
const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))};
|
||||||
const Id struct_type{TypeStruct(record_array_type)};
|
const Id struct_type{TypeStruct(record_array_type)};
|
||||||
|
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||||
|
|
||||||
const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||||
Name(struct_type, name);
|
Name(struct_type, name);
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, 0, "data");
|
MemberName(struct_type, 0, "data");
|
||||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||||
|
}
|
||||||
|
type_ids.push_back(record_array_type);
|
||||||
|
|
||||||
const auto storage_class =
|
const auto storage_class =
|
||||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||||
|
|
|
@ -329,12 +329,16 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
break;
|
break;
|
||||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||||
case Opcode::IMAGE_SAMPLE_O:
|
case Opcode::IMAGE_SAMPLE_O:
|
||||||
|
case Opcode::IMAGE_SAMPLE_C:
|
||||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||||
case Opcode::IMAGE_SAMPLE_LZ:
|
case Opcode::IMAGE_SAMPLE_LZ:
|
||||||
case Opcode::IMAGE_SAMPLE:
|
case Opcode::IMAGE_SAMPLE:
|
||||||
case Opcode::IMAGE_SAMPLE_L:
|
case Opcode::IMAGE_SAMPLE_L:
|
||||||
translator.IMAGE_SAMPLE(inst);
|
translator.IMAGE_SAMPLE(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::IMAGE_GATHER4_C:
|
||||||
|
translator.IMAGE_GATHER(inst);
|
||||||
|
break;
|
||||||
case Opcode::IMAGE_STORE:
|
case Opcode::IMAGE_STORE:
|
||||||
translator.IMAGE_STORE(inst);
|
translator.IMAGE_STORE(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -450,16 +454,22 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||||
|
case Opcode::BUFFER_LOAD_DWORDX3:
|
||||||
translator.BUFFER_LOAD_FORMAT(3, false, inst);
|
translator.BUFFER_LOAD_FORMAT(3, false, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||||
|
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||||
translator.BUFFER_LOAD_FORMAT(4, false, inst);
|
translator.BUFFER_LOAD_FORMAT(4, false, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||||
case Opcode::BUFFER_STORE_DWORD:
|
case Opcode::BUFFER_STORE_DWORD:
|
||||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::BUFFER_STORE_DWORDX3:
|
||||||
|
translator.BUFFER_STORE_FORMAT(3, false, inst);
|
||||||
|
break;
|
||||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||||
|
case Opcode::BUFFER_STORE_DWORDX4:
|
||||||
translator.BUFFER_STORE_FORMAT(4, false, inst);
|
translator.BUFFER_STORE_FORMAT(4, false, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_MAX_F32:
|
case Opcode::V_MAX_F32:
|
||||||
|
|
|
@ -146,6 +146,7 @@ public:
|
||||||
// MIMG
|
// MIMG
|
||||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||||
|
void IMAGE_GATHER(const GcnInst& inst);
|
||||||
void IMAGE_STORE(const GcnInst& inst);
|
void IMAGE_STORE(const GcnInst& inst);
|
||||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||||
|
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||||
info.explicit_lod.Assign(explicit_lod);
|
info.explicit_lod.Assign(explicit_lod);
|
||||||
|
|
||||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||||
|
@ -108,6 +109,74 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
||||||
|
const auto& mimg = inst.control.mimg;
|
||||||
|
if (mimg.da) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
|
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||||
|
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||||
|
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
||||||
|
const auto flags = MimgModifierFlags(mimg.mod);
|
||||||
|
|
||||||
|
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
||||||
|
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
||||||
|
// binding index.
|
||||||
|
const IR::Value handle =
|
||||||
|
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
||||||
|
|
||||||
|
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
||||||
|
// Set Architecture
|
||||||
|
const IR::Value offset =
|
||||||
|
flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{};
|
||||||
|
const IR::F32 bias =
|
||||||
|
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||||
|
const IR::F32 dref =
|
||||||
|
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||||
|
|
||||||
|
// Derivatives are tricky because their number depends on the texture type which is located in
|
||||||
|
// T#. We don't have access to T# though until resource tracking pass. For now assume no
|
||||||
|
// derivatives are present, otherwise we don't know where coordinates are placed in the address
|
||||||
|
// stream.
|
||||||
|
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
|
||||||
|
|
||||||
|
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||||
|
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||||
|
// in coords field of the instruction. Then the resource tracking pass will patch the
|
||||||
|
// IR instruction to fill in lod_clamp field.
|
||||||
|
const IR::Value body = ir.CompositeConstruct(
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||||
|
|
||||||
|
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||||
|
|
||||||
|
IR::TextureInstInfo info{};
|
||||||
|
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
||||||
|
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||||
|
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||||
|
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||||
|
info.explicit_lod.Assign(explicit_lod);
|
||||||
|
|
||||||
|
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||||
|
const IR::Value texel = [&]() -> IR::Value {
|
||||||
|
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||||
|
if (!flags.test(MimgModifier::Pcf)) {
|
||||||
|
return ir.ImageGather(handle, body, offset, {}, info);
|
||||||
|
}
|
||||||
|
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
|
||||||
|
}();
|
||||||
|
|
||||||
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
if (((mimg.dmask >> i) & 1) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||||
|
ir.SetVectorReg(dest_reg++, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||||
const auto& mimg = inst.control.mimg;
|
const auto& mimg = inst.control.mimg;
|
||||||
IR::VectorReg addr_reg{inst.src[0].code};
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
|
|
|
@ -198,6 +198,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||||
|
|
||||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
Descriptors& descriptors) {
|
Descriptors& descriptors) {
|
||||||
|
static constexpr size_t MaxUboSize = 65536;
|
||||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||||
const auto sharp = TrackSharp(producer);
|
const auto sharp = TrackSharp(producer);
|
||||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||||
|
@ -207,7 +208,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.num_records = u32(buffer.num_records),
|
.num_records = u32(buffer.num_records),
|
||||||
.used_types = BufferDataType(inst),
|
.used_types = BufferDataType(inst),
|
||||||
.is_storage = IsBufferStore(inst),
|
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||||
});
|
});
|
||||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
@ -252,25 +253,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
std::deque<IR::Inst*> insts{&inst};
|
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||||
const auto& pred = [](auto opcode) -> bool {
|
while (producer->GetOpcode() == IR::Opcode::Phi) {
|
||||||
return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
producer = producer->Arg(0).InstRecursive();
|
||||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
|
||||||
opcode == IR::Opcode::GetUserData);
|
|
||||||
};
|
|
||||||
|
|
||||||
IR::Inst* producer{};
|
|
||||||
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
|
|
||||||
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
|
|
||||||
const auto arg = producer->Arg(arg_idx);
|
|
||||||
if (arg.TryInstRecursive()) {
|
|
||||||
insts.push_back(arg.InstRecursive());
|
|
||||||
}
|
}
|
||||||
}
|
ASSERT(producer->GetOpcode() ==
|
||||||
insts.pop_front();
|
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||||
}
|
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||||
|
producer->GetOpcode() == IR::Opcode::GetUserData);
|
||||||
ASSERT(pred(producer->GetOpcode()));
|
|
||||||
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
return std::make_pair(producer->Arg(0).InstRecursive(),
|
||||||
|
@ -335,6 +325,22 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
}();
|
}();
|
||||||
inst.SetArg(1, coords);
|
inst.SetArg(1, coords);
|
||||||
|
|
||||||
|
if (inst_info.has_offset) {
|
||||||
|
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
||||||
|
const u32 arg_pos = inst_info.is_depth ? 4 : 3;
|
||||||
|
const IR::Value arg = inst.Arg(arg_pos);
|
||||||
|
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||||
|
const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); };
|
||||||
|
union {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 6, u32> x;
|
||||||
|
BitField<8, 6, u32> y;
|
||||||
|
BitField<16, 6, u32> z;
|
||||||
|
} offset{arg.U32()};
|
||||||
|
const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y));
|
||||||
|
inst.SetArg(arg_pos, value);
|
||||||
|
}
|
||||||
|
|
||||||
if (inst_info.has_lod_clamp) {
|
if (inst_info.has_lod_clamp) {
|
||||||
// Final argument contains lod_clamp
|
// Final argument contains lod_clamp
|
||||||
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||||
|
|
|
@ -38,6 +38,7 @@ union TextureInstInfo {
|
||||||
BitField<2, 1, u32> has_lod_clamp;
|
BitField<2, 1, u32> has_lod_clamp;
|
||||||
BitField<3, 1, u32> force_level0;
|
BitField<3, 1, u32> force_level0;
|
||||||
BitField<4, 1, u32> explicit_lod;
|
BitField<4, 1, u32> explicit_lod;
|
||||||
|
BitField<5, 1, u32> has_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
|
|
|
@ -52,7 +52,7 @@ struct BufferResource {
|
||||||
|
|
||||||
auto operator<=>(const BufferResource&) const = default;
|
auto operator<=>(const BufferResource&) const = default;
|
||||||
};
|
};
|
||||||
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
|
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
|
||||||
|
|
||||||
struct ImageResource {
|
struct ImageResource {
|
||||||
u32 sgpr_base;
|
u32 sgpr_base;
|
||||||
|
@ -62,13 +62,13 @@ struct ImageResource {
|
||||||
bool is_storage;
|
bool is_storage;
|
||||||
bool is_depth;
|
bool is_depth;
|
||||||
};
|
};
|
||||||
using ImageResourceList = boost::container::static_vector<ImageResource, 8>;
|
using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
|
||||||
|
|
||||||
struct SamplerResource {
|
struct SamplerResource {
|
||||||
u32 sgpr_base;
|
u32 sgpr_base;
|
||||||
u32 dword_offset;
|
u32 dword_offset;
|
||||||
};
|
};
|
||||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 8>;
|
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||||
|
|
||||||
struct Info {
|
struct Info {
|
||||||
struct VsInput {
|
struct VsInput {
|
||||||
|
|
|
@ -187,6 +187,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
case PM4ItOpcode::ClearState: {
|
case PM4ItOpcode::ClearState: {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::SetConfigReg: {
|
||||||
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
|
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
|
||||||
|
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
||||||
|
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::SetContextReg: {
|
case PM4ItOpcode::SetContextReg: {
|
||||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
|
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
|
||||||
|
|
|
@ -43,6 +43,7 @@ struct Liverpool {
|
||||||
static constexpr u32 NumShaderUserData = 16;
|
static constexpr u32 NumShaderUserData = 16;
|
||||||
static constexpr u32 UconfigRegWordOffset = 0xC000;
|
static constexpr u32 UconfigRegWordOffset = 0xC000;
|
||||||
static constexpr u32 ContextRegWordOffset = 0xA000;
|
static constexpr u32 ContextRegWordOffset = 0xA000;
|
||||||
|
static constexpr u32 ConfigRegWordOffset = 0x2000;
|
||||||
static constexpr u32 ShRegWordOffset = 0x2C00;
|
static constexpr u32 ShRegWordOffset = 0x2C00;
|
||||||
static constexpr u32 NumRegs = 0xD000;
|
static constexpr u32 NumRegs = 0xD000;
|
||||||
|
|
||||||
|
@ -789,6 +790,7 @@ struct Liverpool {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 1, u32> depth_clear_enable;
|
BitField<0, 1, u32> depth_clear_enable;
|
||||||
BitField<1, 1, u32> stencil_clear_enable;
|
BitField<1, 1, u32> stencil_clear_enable;
|
||||||
|
BitField<6, 1, u32> depth_compress_disable;
|
||||||
};
|
};
|
||||||
|
|
||||||
union AaConfig {
|
union AaConfig {
|
||||||
|
|
|
@ -366,6 +366,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eR8G8Unorm;
|
return vk::Format::eR8G8Unorm;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return vk::Format::eBc7UnormBlock;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eBc2UnormBlock;
|
return vk::Format::eBc2UnormBlock;
|
||||||
}
|
}
|
||||||
|
@ -376,9 +379,15 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eA2R10G10B10UnormPack32;
|
return vk::Format::eA2R10G10B10UnormPack32;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return vk::Format::eA2R10G10B10SnormPack32;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::Format10_11_11 && num_format == AmdGpu::NumberFormat::Float) {
|
if (data_format == AmdGpu::DataFormat::Format10_11_11 && num_format == AmdGpu::NumberFormat::Float) {
|
||||||
return vk::Format::eB10G11R11UfloatPack32;
|
return vk::Format::eB10G11R11UfloatPack32;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
return vk::Format::eR16G16Sfloat;
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -381,6 +381,7 @@ Frame* RendererVulkan::GetRenderFrame() {
|
||||||
{
|
{
|
||||||
std::unique_lock lock{free_mutex};
|
std::unique_lock lock{free_mutex};
|
||||||
free_cv.wait(lock, [this] { return !free_queue.empty(); });
|
free_cv.wait(lock, [this] { return !free_queue.empty(); });
|
||||||
|
LOG_INFO(Render_Vulkan, "Got render frame, remaining {}", free_queue.size() - 1);
|
||||||
|
|
||||||
// Take the frame from the queue
|
// Take the frame from the queue
|
||||||
frame = free_queue.front();
|
frame = free_queue.front();
|
||||||
|
|
|
@ -85,7 +85,7 @@ ComputePipeline::~ComputePipeline() = default;
|
||||||
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
VideoCore::TextureCache& texture_cache) const {
|
VideoCore::TextureCache& texture_cache) const {
|
||||||
// Bind resource buffers and textures.
|
// Bind resource buffers and textures.
|
||||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
|
||||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
||||||
// need its full emulation anyways. For cases of metadata read a warning will be logged.
|
// need its full emulation anyways. For cases of metadata read a warning will be logged.
|
||||||
if (buffer.is_storage) {
|
if (buffer.is_storage) {
|
||||||
if (texture_cache.TouchMeta(address, true)) {
|
if (texture_cache.TouchMeta(address, true)) {
|
||||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
LOG_WARNING(Render_Vulkan, "Metadata update skipped");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -127,7 +127,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
||||||
|
|
||||||
for (const auto& image : info.images) {
|
for (const auto& image : info.images) {
|
||||||
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||||
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
|
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth);
|
||||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
|
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
|
|
@ -187,7 +187,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
|
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
|
||||||
.colorAttachmentCount = num_color_formats,
|
.colorAttachmentCount = num_color_formats,
|
||||||
.pColorAttachmentFormats = key.color_formats.data(),
|
.pColorAttachmentFormats = key.color_formats.data(),
|
||||||
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
|
.depthAttachmentFormat = key.depth_format,
|
||||||
.stencilAttachmentFormat = vk::Format::eUndefined,
|
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -320,7 +320,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
||||||
|
|
||||||
// Bind resource buffers and textures.
|
// Bind resource buffers and textures.
|
||||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||||
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
|
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
||||||
|
@ -350,9 +350,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
||||||
|
|
||||||
for (const auto& image : stage.images) {
|
for (const auto& image : stage.images) {
|
||||||
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||||
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
|
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth);
|
||||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
||||||
vk::ImageLayout::eShaderReadOnlyOptimal);
|
(image.is_storage || image.is_depth) ? vk::ImageLayout::eGeneral
|
||||||
|
: vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
.dstBinding = binding++,
|
.dstBinding = binding++,
|
||||||
|
|
|
@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
|
||||||
.logicOp = features.logicOp,
|
.logicOp = features.logicOp,
|
||||||
.samplerAnisotropy = features.samplerAnisotropy,
|
.samplerAnisotropy = features.samplerAnisotropy,
|
||||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||||
|
.shaderImageGatherExtended = true,
|
||||||
.shaderStorageImageMultisample = true,
|
.shaderStorageImageMultisample = true,
|
||||||
.shaderClipDistance = features.shaderClipDistance,
|
.shaderClipDistance = features.shaderClipDistance,
|
||||||
},
|
},
|
||||||
|
|
|
@ -117,8 +117,8 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
key.num_samples = regs.aa_config.NumSamples();
|
key.num_samples = regs.aa_config.NumSamples();
|
||||||
|
|
||||||
const auto& db = regs.depth_buffer;
|
const auto& db = regs.depth_buffer;
|
||||||
if (key.depth.depth_enable) {
|
|
||||||
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
|
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
|
||||||
|
if (key.depth.depth_enable) {
|
||||||
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,6 +206,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||||
block_pool.ReleaseContents();
|
block_pool.ReleaseContents();
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
|
|
||||||
|
if (hash == 0xa34c48f8) {
|
||||||
|
printf("bad\n");
|
||||||
|
}
|
||||||
|
|
||||||
// Recompile shader to IR.
|
// Recompile shader to IR.
|
||||||
try {
|
try {
|
||||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||||
|
@ -214,12 +218,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||||
|
|
||||||
// Compile IR to SPIR-V
|
// Compile IR to SPIR-V
|
||||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||||
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
|
||||||
infos[i] = &programs[i].info;
|
|
||||||
|
|
||||||
if (Config::dumpShaders()) {
|
if (Config::dumpShaders()) {
|
||||||
DumpShader(spv_code, hash, stage, "spv");
|
DumpShader(spv_code, hash, stage, "spv");
|
||||||
}
|
}
|
||||||
|
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
||||||
|
infos[i] = &programs[i].info;
|
||||||
} catch (const Shader::Exception& e) {
|
} catch (const Shader::Exception& e) {
|
||||||
UNREACHABLE_MSG("{}", e.what());
|
UNREACHABLE_MSG("{}", e.what());
|
||||||
}
|
}
|
||||||
|
@ -246,6 +249,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
|
|
||||||
// Recompile shader to IR.
|
// Recompile shader to IR.
|
||||||
|
try {
|
||||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||||
const Shader::Info info =
|
const Shader::Info info =
|
||||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||||
|
@ -254,14 +258,16 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
||||||
// Compile IR to SPIR-V
|
// Compile IR to SPIR-V
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
|
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
|
||||||
const auto module = CompileSPV(spv_code, instance.GetDevice());
|
|
||||||
|
|
||||||
if (Config::dumpShaders()) {
|
if (Config::dumpShaders()) {
|
||||||
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
|
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
|
||||||
}
|
}
|
||||||
|
const auto module = CompileSPV(spv_code, instance.GetDevice());
|
||||||
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
|
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
|
||||||
module);
|
module);
|
||||||
|
} catch (const Shader::Exception& e) {
|
||||||
|
UNREACHABLE_MSG("{}", e.what());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||||
pipeline_cache{instance, scheduler, liverpool},
|
pipeline_cache{instance, scheduler, liverpool},
|
||||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} {
|
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
liverpool->BindRasterizer(this);
|
liverpool->BindRasterizer(this);
|
||||||
}
|
}
|
||||||
|
@ -46,71 +46,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
|
|
||||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
|
|
||||||
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
|
BeginRendering();
|
||||||
color_attachments{};
|
|
||||||
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
|
|
||||||
const auto& col_buf = regs.color_buffers[col_buf_id];
|
|
||||||
if (!col_buf) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
|
||||||
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
|
||||||
|
|
||||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
|
||||||
color_attachments.push_back({
|
|
||||||
.imageView = *image_view.image_view,
|
|
||||||
.imageLayout = vk::ImageLayout::eGeneral,
|
|
||||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
|
||||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
|
||||||
.clearValue =
|
|
||||||
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
|
|
||||||
});
|
|
||||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::RenderingAttachmentInfo depth_attachment{};
|
|
||||||
u32 num_depth_attachments{};
|
|
||||||
if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) {
|
|
||||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
|
||||||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
|
||||||
texture_cache.IsMetaCleared(htile_address);
|
|
||||||
const auto& image_view =
|
|
||||||
texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent);
|
|
||||||
depth_attachment = {
|
|
||||||
.imageView = *image_view.image_view,
|
|
||||||
.imageLayout = vk::ImageLayout::eGeneral,
|
|
||||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
|
||||||
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
|
|
||||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
|
|
||||||
.stencil = regs.stencil_clear}},
|
|
||||||
};
|
|
||||||
texture_cache.TouchMeta(htile_address, false);
|
|
||||||
num_depth_attachments++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Don't restart renderpass every draw
|
|
||||||
const auto& scissor = regs.screen_scissor;
|
|
||||||
vk::RenderingInfo rendering_info = {
|
|
||||||
.renderArea =
|
|
||||||
{
|
|
||||||
.offset = {scissor.top_left_x, scissor.top_left_y},
|
|
||||||
.extent = {scissor.GetWidth(), scissor.GetHeight()},
|
|
||||||
},
|
|
||||||
.layerCount = 1,
|
|
||||||
.colorAttachmentCount = static_cast<u32>(color_attachments.size()),
|
|
||||||
.pColorAttachments = color_attachments.data(),
|
|
||||||
.pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr,
|
|
||||||
};
|
|
||||||
auto& area = rendering_info.renderArea.extent;
|
|
||||||
if (area.width == 2048) {
|
|
||||||
area.width = 1920;
|
|
||||||
area.height = 1080;
|
|
||||||
}
|
|
||||||
|
|
||||||
UpdateDynamicState(*pipeline);
|
UpdateDynamicState(*pipeline);
|
||||||
|
|
||||||
cmdbuf.beginRendering(rendering_info);
|
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||||
|
@ -120,7 +58,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
: regs.num_indices;
|
: regs.num_indices;
|
||||||
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
|
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
|
||||||
}
|
}
|
||||||
cmdbuf.endRendering();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::DispatchDirect() {
|
void Rasterizer::DispatchDirect() {
|
||||||
|
@ -138,15 +75,66 @@ void Rasterizer::DispatchDirect() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scheduler.EndRendering();
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::BeginRendering() {
|
||||||
|
const auto& regs = liverpool->regs;
|
||||||
|
RenderState state;
|
||||||
|
|
||||||
|
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
|
||||||
|
const auto& col_buf = regs.color_buffers[col_buf_id];
|
||||||
|
if (!col_buf) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||||
|
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
||||||
|
state.width = std::min<u32>(state.width, hint.width);
|
||||||
|
state.height = std::min<u32>(state.height, hint.height);
|
||||||
|
|
||||||
|
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||||
|
state.color_attachments[state.num_color_attachments++] = {
|
||||||
|
.imageView = *image_view.image_view,
|
||||||
|
.imageLayout = vk::ImageLayout::eGeneral,
|
||||||
|
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||||
|
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||||
|
.clearValue =
|
||||||
|
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
|
||||||
|
};
|
||||||
|
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald &&
|
||||||
|
regs.depth_buffer.Address() != 0) {
|
||||||
|
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||||
|
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||||
|
texture_cache.IsMetaCleared(htile_address);
|
||||||
|
const auto& hint = liverpool->last_db_extent;
|
||||||
|
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint);
|
||||||
|
state.width = std::min<u32>(state.width, hint.width);
|
||||||
|
state.height = std::min<u32>(state.height, hint.height);
|
||||||
|
state.depth_attachment = {
|
||||||
|
.imageView = *image_view.image_view,
|
||||||
|
.imageLayout = vk::ImageLayout::eGeneral,
|
||||||
|
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||||
|
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
|
||||||
|
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
|
||||||
|
.stencil = regs.stencil_clear}},
|
||||||
|
};
|
||||||
|
texture_cache.TouchMeta(htile_address, false);
|
||||||
|
state.num_depth_attachments++;
|
||||||
|
}
|
||||||
|
scheduler.BeginRendering(state);
|
||||||
|
}
|
||||||
|
|
||||||
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
||||||
// Emulate QuadList primitive type with CPU made index buffer.
|
// Emulate QuadList primitive type with CPU made index buffer.
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
|
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
|
||||||
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
|
//ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
|
||||||
is_indexed = true;
|
is_indexed = true;
|
||||||
|
|
||||||
// Emit indices.
|
// Emit indices.
|
||||||
|
|
|
@ -37,6 +37,8 @@ private:
|
||||||
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||||
void MapMemory(VAddr addr, size_t size);
|
void MapMemory(VAddr addr, size_t size);
|
||||||
|
|
||||||
|
void BeginRendering();
|
||||||
|
|
||||||
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
||||||
void UpdateViewportScissorState();
|
void UpdateViewportScissorState();
|
||||||
void UpdateDepthStencilState();
|
void UpdateDepthStencilState();
|
||||||
|
|
|
@ -18,6 +18,37 @@ Scheduler::~Scheduler() {
|
||||||
std::free(profiler_scope);
|
std::free(profiler_scope);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Scheduler::BeginRendering(const RenderState& new_state) {
|
||||||
|
if (is_rendering && render_state == new_state) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
EndRendering();
|
||||||
|
is_rendering = true;
|
||||||
|
render_state = new_state;
|
||||||
|
|
||||||
|
const vk::RenderingInfo rendering_info = {
|
||||||
|
.renderArea = {
|
||||||
|
.offset = {0, 0},
|
||||||
|
.extent = {render_state.width, render_state.height},
|
||||||
|
},
|
||||||
|
.layerCount = 1,
|
||||||
|
.colorAttachmentCount = static_cast<u32>(render_state.color_attachments.size()),
|
||||||
|
.pColorAttachments = render_state.color_attachments.data(),
|
||||||
|
.pDepthAttachment = render_state.num_depth_attachments ?
|
||||||
|
&render_state.depth_attachment : nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
current_cmdbuf.beginRendering(rendering_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Scheduler::EndRendering() {
|
||||||
|
if (!is_rendering) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
is_rendering = false;
|
||||||
|
current_cmdbuf.endRendering();
|
||||||
|
}
|
||||||
|
|
||||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||||
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
||||||
SubmitExecution(signal, wait);
|
SubmitExecution(signal, wait);
|
||||||
|
@ -55,6 +86,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||||
|
std::scoped_lock lk{submit_mutex};
|
||||||
const u64 signal_value = master_semaphore.NextTick();
|
const u64 signal_value = master_semaphore.NextTick();
|
||||||
|
|
||||||
auto* profiler_ctx = instance.GetProfilerContext();
|
auto* profiler_ctx = instance.GetProfilerContext();
|
||||||
|
@ -63,7 +95,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
|
||||||
TracyVkCollect(profiler_ctx, current_cmdbuf);
|
TracyVkCollect(profiler_ctx, current_cmdbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
EndRendering();
|
||||||
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
||||||
master_semaphore.Refresh();
|
master_semaphore.Refresh();
|
||||||
AllocateWorkerCommandBuffers();
|
AllocateWorkerCommandBuffers();
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||||
|
@ -12,6 +13,19 @@ namespace Vulkan {
|
||||||
|
|
||||||
class Instance;
|
class Instance;
|
||||||
|
|
||||||
|
struct RenderState {
|
||||||
|
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
|
||||||
|
vk::RenderingAttachmentInfo depth_attachment{};
|
||||||
|
u32 num_color_attachments{};
|
||||||
|
u32 num_depth_attachments{};
|
||||||
|
u32 width = std::numeric_limits<u32>::max();
|
||||||
|
u32 height = std::numeric_limits<u32>::max();
|
||||||
|
|
||||||
|
bool operator==(const RenderState& other) const noexcept {
|
||||||
|
return std::memcmp(this, &other, sizeof(RenderState)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Scheduler {
|
class Scheduler {
|
||||||
public:
|
public:
|
||||||
explicit Scheduler(const Instance& instance);
|
explicit Scheduler(const Instance& instance);
|
||||||
|
@ -26,6 +40,12 @@ public:
|
||||||
/// Waits for the given tick to trigger on the GPU.
|
/// Waits for the given tick to trigger on the GPU.
|
||||||
void Wait(u64 tick);
|
void Wait(u64 tick);
|
||||||
|
|
||||||
|
/// Starts a new rendering scope with provided state.
|
||||||
|
void BeginRendering(const RenderState& new_state);
|
||||||
|
|
||||||
|
/// Ends current rendering scope.
|
||||||
|
void EndRendering();
|
||||||
|
|
||||||
/// Returns the current command buffer.
|
/// Returns the current command buffer.
|
||||||
vk::CommandBuffer CommandBuffer() const {
|
vk::CommandBuffer CommandBuffer() const {
|
||||||
return current_cmdbuf;
|
return current_cmdbuf;
|
||||||
|
@ -59,6 +79,8 @@ private:
|
||||||
CommandPool command_pool;
|
CommandPool command_pool;
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
std::condition_variable_any event_cv;
|
std::condition_variable_any event_cv;
|
||||||
|
RenderState render_state;
|
||||||
|
bool is_rendering = false;
|
||||||
tracy::VkCtxScope* profiler_scope{};
|
tracy::VkCtxScope* profiler_scope{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -221,6 +221,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
||||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||||
|
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||||
vk::ImageCreateFlagBits::eExtendedUsage};
|
vk::ImageCreateFlagBits::eExtendedUsage};
|
||||||
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
||||||
|
@ -272,7 +273,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask) {
|
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||||
|
vk::CommandBuffer cmdbuf) {
|
||||||
if (dst_layout == layout && dst_mask == access_mask) {
|
if (dst_layout == layout && dst_mask == access_mask) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -300,7 +302,12 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
|
||||||
dst_mask == vk::AccessFlagBits::eTransferWrite)
|
dst_mask == vk::AccessFlagBits::eTransferWrite)
|
||||||
? vk::PipelineStageFlagBits::eTransfer
|
? vk::PipelineStageFlagBits::eTransfer
|
||||||
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
|
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
|
||||||
const auto cmdbuf = scheduler->CommandBuffer();
|
|
||||||
|
if (!cmdbuf) {
|
||||||
|
// When using external cmdbuf you are responsible for ending rp.
|
||||||
|
scheduler->EndRendering();
|
||||||
|
cmdbuf = scheduler->CommandBuffer();
|
||||||
|
}
|
||||||
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
|
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
|
||||||
barrier);
|
barrier);
|
||||||
|
|
||||||
|
@ -310,6 +317,7 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::Upload(vk::Buffer buffer, u64 offset) {
|
void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||||
|
scheduler->EndRendering();
|
||||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||||
|
|
||||||
// Copy to the image.
|
// Copy to the image.
|
||||||
|
@ -318,7 +326,7 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||||
.bufferRowLength = info.pitch,
|
.bufferRowLength = info.pitch,
|
||||||
.bufferImageHeight = info.size.height,
|
.bufferImageHeight = info.size.height,
|
||||||
.imageSubresource{
|
.imageSubresource{
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
.aspectMask = aspect_mask,
|
||||||
.mipLevel = 0,
|
.mipLevel = 0,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = 1,
|
.layerCount = 1,
|
||||||
|
|
|
@ -132,7 +132,8 @@ struct Image {
|
||||||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||||
}
|
}
|
||||||
|
|
||||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask);
|
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||||
|
vk::CommandBuffer cmdbuf = {});
|
||||||
void Upload(vk::Buffer buffer, u64 offset);
|
void Upload(vk::Buffer buffer, u64 offset);
|
||||||
|
|
||||||
const Vulkan::Instance* instance;
|
const Vulkan::Instance* instance;
|
||||||
|
|
|
@ -80,8 +80,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||||
|
|
||||||
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
||||||
vk::Format format = info.format;
|
vk::Format format = info.format;
|
||||||
|
vk::ImageAspectFlags aspect = image.aspect_mask;
|
||||||
if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) {
|
if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) {
|
||||||
format = vk::Format::eD32Sfloat;
|
format = image.info.pixel_format;
|
||||||
|
aspect = vk::ImageAspectFlagBits::eDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vk::ImageViewCreateInfo image_view_ci = {
|
const vk::ImageViewCreateInfo image_view_ci = {
|
||||||
|
@ -91,7 +93,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||||
.format = format,
|
.format = format,
|
||||||
.components = info.mapping,
|
.components = info.mapping,
|
||||||
.subresourceRange{
|
.subresourceRange{
|
||||||
.aspectMask = image.aspect_mask,
|
.aspectMask = aspect,
|
||||||
.baseMipLevel = 0U,
|
.baseMipLevel = 0U,
|
||||||
.levelCount = 1,
|
.levelCount = 1,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
|
|
|
@ -116,10 +116,15 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re
|
||||||
std::unique_lock lock{m_page_table};
|
std::unique_lock lock{m_page_table};
|
||||||
boost::container::small_vector<ImageId, 2> image_ids;
|
boost::container::small_vector<ImageId, 2> image_ids;
|
||||||
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
||||||
if (image.cpu_addr == cpu_address && image.info.size.width == info.size.width &&
|
// Address and width must match.
|
||||||
image.info.IsDepthStencil() == info.IsDepthStencil()) {
|
if (image.cpu_addr != cpu_address || image.info.size.width != info.size.width) {
|
||||||
image_ids.push_back(image_id);
|
return;
|
||||||
}
|
}
|
||||||
|
if (info.IsDepthStencil() != image.info.IsDepthStencil() &&
|
||||||
|
info.pixel_format != vk::Format::eR32Sfloat) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image_ids.push_back(image_id);
|
||||||
});
|
});
|
||||||
|
|
||||||
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||||
|
@ -129,7 +134,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re
|
||||||
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
||||||
RegisterImage(image_id);
|
RegisterImage(image_id);
|
||||||
} else {
|
} else {
|
||||||
image_id = image_ids[0];
|
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
RegisterMeta(info, image_id);
|
RegisterMeta(info, image_id);
|
||||||
|
@ -163,11 +168,11 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
|
||||||
return slot_image_views[view_id];
|
return slot_image_views[view_id];
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
|
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage, bool is_depth) {
|
||||||
const ImageInfo info{desc};
|
const ImageInfo info{desc};
|
||||||
Image& image = FindImage(info, desc.Address());
|
Image& image = FindImage(info, desc.Address());
|
||||||
|
|
||||||
if (is_storage) {
|
if (is_storage || is_depth) {
|
||||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||||
image.info.usage.storage = true;
|
image.info.usage.storage = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -202,7 +207,7 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
|
||||||
auto& image = FindImage(info, buffer.Address(), false);
|
auto& image = FindImage(info, buffer.Address(), false);
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal,
|
image.Transit(vk::ImageLayout::eGeneral,
|
||||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||||
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
||||||
|
|
||||||
|
@ -261,6 +266,8 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
.imageExtent = {width, height, 1},
|
.imageExtent = {width, height, 1},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
scheduler.EndRendering();
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,8 @@ public:
|
||||||
bool refresh_on_create = true);
|
bool refresh_on_create = true);
|
||||||
|
|
||||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||||
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
|
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage,
|
||||||
|
bool is_depth);
|
||||||
|
|
||||||
/// Retrieves the render target with specified properties
|
/// Retrieves the render target with specified properties
|
||||||
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
|
|
|
@ -231,7 +231,7 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
|
||||||
|
|
||||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||||
: instance{instance}, scheduler{scheduler},
|
: instance{instance}, scheduler{scheduler},
|
||||||
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} {
|
||||||
|
|
||||||
static const std::array detiler_shaders{
|
static const std::array detiler_shaders{
|
||||||
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||||
|
|
Loading…
Reference in New Issue