Merge pull request #203 from shadps4-emu/video_core/more_functionality
More instructions support and trivial additions
This commit is contained in:
commit
d9f2758850
|
@ -51,7 +51,7 @@
|
|||
url = https://github.com/zyantific/zydis.git
|
||||
[submodule "externals/sirit"]
|
||||
path = externals/sirit
|
||||
url = https://github.com/raphaelthegreat/sirit.git
|
||||
url = https://github.com/shadps4-emu/sirit
|
||||
[submodule "externals/xxhash"]
|
||||
path = externals/xxhash
|
||||
url = https://github.com/Cyan4973/xxHash.git
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit fc65ebb5b56b849b1205d5baa2ca38440096652d
|
||||
Subproject commit 505cc66a2be70b268c1700fef4d5327a5fe46494
|
|
@ -110,6 +110,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
|||
CLS(Frontend) \
|
||||
CLS(Render) \
|
||||
SUB(Render, Vulkan) \
|
||||
SUB(Render, Recompiler) \
|
||||
CLS(Input) \
|
||||
CLS(Tty) \
|
||||
CLS(Loader)
|
||||
|
|
|
@ -77,6 +77,7 @@ enum class Class : u8 {
|
|||
Frontend, ///< Emulator UI
|
||||
Render, ///< Video Core
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
Render_Recompiler, ///< Shader recompiler
|
||||
Loader, ///< ROM loader
|
||||
Input, ///< Input emulation
|
||||
Tty, ///< Debug output from emu
|
||||
|
|
|
@ -185,7 +185,7 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
|
|||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE();
|
||||
EmitStoreBufferU32(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
|
|
|
@ -240,6 +240,8 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
|||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
|
||||
|
|
|
@ -68,6 +68,14 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) {
|
|||
return ctx.OpISub(ctx.U64, a, b);
|
||||
}
|
||||
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b);
|
||||
}
|
||||
|
||||
Id EmitUMulExt(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b);
|
||||
}
|
||||
|
||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpIMul(ctx.U32[1], a, b);
|
||||
}
|
||||
|
|
|
@ -104,6 +104,9 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
|
||||
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
||||
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
||||
|
||||
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
||||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces(const IR::Program& program) {
|
||||
|
|
|
@ -138,6 +138,9 @@ public:
|
|||
VectorIds U32{};
|
||||
VectorIds U1{};
|
||||
|
||||
Id full_result_i32x2;
|
||||
Id full_result_u32x2;
|
||||
|
||||
Id true_value{};
|
||||
Id false_value{};
|
||||
Id u32_one_value{};
|
||||
|
|
|
@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::
|
|||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
|
||||
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
||||
return syntax_list;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,15 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::S_MOVK(const GcnInst& inst) {
|
||||
const auto simm16 = inst.control.sopk.simm.Value();
|
||||
if (simm16 & (1 << 15)) {
|
||||
// TODO: need to verify the case of imm sign extension
|
||||
UNREACHABLE();
|
||||
}
|
||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||
}
|
||||
|
||||
void Translator::S_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
#define MAGIC_ENUM_RANGE_MIN 0
|
||||
#define MAGIC_ENUM_RANGE_MAX 1515
|
||||
#include "magic_enum.hpp"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
|
||||
|
@ -210,6 +214,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
Translator translator{block, info};
|
||||
for (const auto& inst : inst_list) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
translator.S_MOVK(inst);
|
||||
break;
|
||||
case Opcode::S_MOV_B32:
|
||||
translator.S_MOV(inst);
|
||||
break;
|
||||
|
@ -421,6 +428,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_MAX_F32:
|
||||
translator.V_MAX_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAX_I32:
|
||||
translator.V_MAX_U32(true, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_U32:
|
||||
translator.V_MAX_U32(false, inst);
|
||||
break;
|
||||
case Opcode::V_RSQ_F32:
|
||||
translator.V_RSQ_F32(inst);
|
||||
break;
|
||||
|
@ -581,8 +594,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ADD_I32:
|
||||
translator.S_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_HI_U32:
|
||||
translator.V_MUL_HI_U32(false, inst);
|
||||
break;
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
translator.V_MUL_LO_I32(inst);
|
||||
translator.V_MUL_LO_U32(inst);
|
||||
break;
|
||||
case Opcode::V_SAD_U32:
|
||||
translator.V_SAD_U32(inst);
|
||||
|
@ -641,6 +657,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_BFM_B32:
|
||||
translator.S_BFM_B32(inst);
|
||||
break;
|
||||
case Opcode::V_TRUNC_F32:
|
||||
translator.V_TRUNC_F32(inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
@ -654,7 +673,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
break;
|
||||
default:
|
||||
const u32 opcode = u32(inst.opcode);
|
||||
UNREACHABLE_MSG("Unknown opcode {}", opcode);
|
||||
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})",
|
||||
magic_enum::enum_name(inst.opcode), opcode);
|
||||
info.translation_failed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ public:
|
|||
void EmitFetch(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
void S_MOV(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
|
@ -79,6 +80,7 @@ public:
|
|||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
void V_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SIN_F32(const GcnInst& inst);
|
||||
void V_LOG_F32(const GcnInst& inst);
|
||||
|
@ -96,7 +98,7 @@ public:
|
|||
void V_SUBREV_I32(const GcnInst& inst);
|
||||
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
|
||||
void V_LSHRREV_B32(const GcnInst& inst);
|
||||
void V_MUL_LO_I32(const GcnInst& inst);
|
||||
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_BFE_U32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst);
|
||||
|
@ -112,6 +114,7 @@ public:
|
|||
void V_CVT_I32_F32(const GcnInst& inst);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
void V_TRUNC_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
|
|
@ -197,6 +197,12 @@ void Translator::V_MAX_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMax(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed));
|
||||
}
|
||||
|
||||
void Translator::V_RSQ_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
||||
|
@ -320,10 +326,11 @@ void Translator::V_LSHRREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_LO_I32(const GcnInst& inst) {
|
||||
void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
||||
const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)};
|
||||
SetDst(inst.dst[0], hi);
|
||||
}
|
||||
|
||||
void Translator::V_SAD_U32(const GcnInst& inst) {
|
||||
|
@ -418,4 +425,9 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -216,18 +216,22 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
value = ir.GetVectorReg<Shader::IR::F32>(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
|
|
|
@ -880,6 +880,10 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) {
|
||||
return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::IMul(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::IMul32, a, b);
|
||||
}
|
||||
|
|
|
@ -146,6 +146,7 @@ public:
|
|||
|
||||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||
|
|
|
@ -197,6 +197,8 @@ OPCODE(IAdd64, U64, U64,
|
|||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
OPCODE(SMulExt, U32x2, U32, U32, )
|
||||
OPCODE(UMulExt, U32x2, U32, U32, )
|
||||
OPCODE(SDiv32, U32, U32, U32, )
|
||||
OPCODE(UDiv32, U32, U32, U32, )
|
||||
OPCODE(INeg32, U32, U32, )
|
||||
|
|
|
@ -127,6 +127,7 @@ struct Info {
|
|||
Stage stage;
|
||||
|
||||
bool uses_group_quad{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
||||
template <typename T>
|
||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
|
|
|
@ -312,6 +312,12 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eBc3SrgbBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc3UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc4UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16B16A16Sint;
|
||||
|
@ -322,9 +328,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc1RgbaUnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc3UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR8G8B8A8Uint;
|
||||
|
@ -361,22 +364,19 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
|||
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
|
||||
|
||||
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
|
||||
|
||||
if (comp_swap_alt) {
|
||||
switch (base_format) {
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format;
|
||||
return vk::Format::eB8G8R8A8Unorm;
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format;
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm
|
||||
: is_vo_surface ? vk::Format::eR8G8B8A8Unorm
|
||||
: base_format;
|
||||
return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb;
|
||||
case vk::Format::eB8G8R8A8Srgb:
|
||||
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm
|
||||
: is_vo_surface ? vk::Format::eB8G8R8A8Unorm
|
||||
: base_format;
|
||||
return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb;
|
||||
}
|
||||
UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format));
|
||||
}
|
||||
return base_format;
|
||||
}
|
||||
|
||||
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {
|
||||
|
|
|
@ -189,7 +189,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash);
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
|
@ -224,6 +224,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||
const Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
|
|
@ -85,7 +85,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
}
|
||||
|
||||
const vk::ImageViewCreateInfo image_view_ci = {
|
||||
.pNext = nullptr,
|
||||
.pNext = usage_override ? &usage_ci : nullptr,
|
||||
.image = image.image,
|
||||
.viewType = info.type,
|
||||
.format = format,
|
||||
|
|
Loading…
Reference in New Issue