video_core: Minor fixes (#366)
* data_share: Fix DS instruction * vk_graphics_pipeline: Fix unnecessary invalidate * spirv: Remove subgroup id * vector_alu: Simplify mbcnt pattern * shader_recompiler: More instructions * clang format * kernel: Fix cond memory leak and reduce spam * liverpool: Print error on exception * build fix
This commit is contained in:
parent
cdff4af38d
commit
159be2c7f4
|
@ -727,6 +727,9 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
|
||||||
|
|
||||||
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
|
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
|
||||||
|
|
||||||
|
delete *cond;
|
||||||
|
*cond = nullptr;
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
return SCE_OK;
|
return SCE_OK;
|
||||||
|
@ -1142,7 +1145,7 @@ int PS4_SYSV_ABI scePthreadCondWait(ScePthreadCond* cond, ScePthreadMutex* mutex
|
||||||
}
|
}
|
||||||
int result = pthread_cond_wait(&(*cond)->cond, &(*mutex)->pth_mutex);
|
int result = pthread_cond_wait(&(*cond)->cond, &(*mutex)->pth_mutex);
|
||||||
|
|
||||||
LOG_INFO(Kernel_Pthread, "scePthreadCondWait, result={}", result);
|
LOG_DEBUG(Kernel_Pthread, "scePthreadCondWait, result={}", result);
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -1162,7 +1165,7 @@ int PS4_SYSV_ABI scePthreadCondattrDestroy(ScePthreadCondattr* attr) {
|
||||||
}
|
}
|
||||||
int result = pthread_condattr_destroy(&(*attr)->cond_attr);
|
int result = pthread_condattr_destroy(&(*attr)->cond_attr);
|
||||||
|
|
||||||
LOG_INFO(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result);
|
LOG_DEBUG(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result);
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
|
|
|
@ -11,7 +11,7 @@ Id SubgroupScope(EmitContext& ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitWarpId(EmitContext& ctx) {
|
Id EmitWarpId(EmitContext& ctx) {
|
||||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLaneId(EmitContext& ctx) {
|
Id EmitLaneId(EmitContext& ctx) {
|
||||||
|
|
|
@ -225,7 +225,6 @@ void EmitContext::DefineInputs(const Info& info) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Fragment:
|
case Stage::Fragment:
|
||||||
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
|
|
||||||
subgroup_local_invocation_id = DefineVariable(
|
subgroup_local_invocation_id = DefineVariable(
|
||||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||||
|
|
|
@ -180,7 +180,6 @@ public:
|
||||||
|
|
||||||
Id workgroup_id{};
|
Id workgroup_id{};
|
||||||
Id local_invocation_id{};
|
Id local_invocation_id{};
|
||||||
Id subgroup_id{};
|
|
||||||
Id subgroup_local_invocation_id{};
|
Id subgroup_local_invocation_id{};
|
||||||
Id image_u32{};
|
Id image_u32{};
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,8 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
// Pair loads are either 32 or 64-bit
|
// Pair loads are either 32 or 64-bit
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
const u32 adj = bit_size == 32 ? 4 : 8;
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||||
|
@ -56,7 +57,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||||
|
@ -65,11 +66,13 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
|
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||||
ir.SetVectorReg(dst_reg, data);
|
ir.SetVectorReg(dst_reg, data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,7 +82,8 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
||||||
const IR::VectorReg data0{inst.src[1].code};
|
const IR::VectorReg data0{inst.src[1].code};
|
||||||
const IR::VectorReg data1{inst.src[2].code};
|
const IR::VectorReg data1{inst.src[2].code};
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
const u32 adj = bit_size == 32 ? 4 : 8;
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -87,7 +91,7 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
||||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
||||||
addr0);
|
addr0);
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -96,11 +100,13 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
||||||
addr1);
|
addr1);
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
const IR::Value data =
|
const IR::Value data =
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, data, addr);
|
ir.WriteShared(bit_size, data, addr0);
|
||||||
} else {
|
} else {
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -125,6 +125,7 @@ public:
|
||||||
void V_ADD_F32(const GcnInst& inst);
|
void V_ADD_F32(const GcnInst& inst);
|
||||||
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
||||||
void V_MED3_F32(const GcnInst& inst);
|
void V_MED3_F32(const GcnInst& inst);
|
||||||
|
void V_MED3_I32(const GcnInst& inst);
|
||||||
void V_FLOOR_F32(const GcnInst& inst);
|
void V_FLOOR_F32(const GcnInst& inst);
|
||||||
void V_SUB_F32(const GcnInst& inst);
|
void V_SUB_F32(const GcnInst& inst);
|
||||||
void V_RCP_F32(const GcnInst& inst);
|
void V_RCP_F32(const GcnInst& inst);
|
||||||
|
@ -159,6 +160,7 @@ public:
|
||||||
void V_SUB_I32(const GcnInst& inst);
|
void V_SUB_I32(const GcnInst& inst);
|
||||||
void V_LSHR_B32(const GcnInst& inst);
|
void V_LSHR_B32(const GcnInst& inst);
|
||||||
void V_ASHRREV_I32(const GcnInst& inst);
|
void V_ASHRREV_I32(const GcnInst& inst);
|
||||||
|
void V_ASHR_I32(const GcnInst& inst);
|
||||||
void V_MAD_U32_U24(const GcnInst& inst);
|
void V_MAD_U32_U24(const GcnInst& inst);
|
||||||
void V_RNDNE_F32(const GcnInst& inst);
|
void V_RNDNE_F32(const GcnInst& inst);
|
||||||
void V_BCNT_U32_B32(const GcnInst& inst);
|
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||||
|
|
|
@ -24,6 +24,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||||
return V_LSHR_B32(inst);
|
return V_LSHR_B32(inst);
|
||||||
case Opcode::V_ASHRREV_I32:
|
case Opcode::V_ASHRREV_I32:
|
||||||
return V_ASHRREV_I32(inst);
|
return V_ASHRREV_I32(inst);
|
||||||
|
case Opcode::V_ASHR_I32:
|
||||||
|
return V_ASHR_I32(inst);
|
||||||
case Opcode::V_LSHRREV_B32:
|
case Opcode::V_LSHRREV_B32:
|
||||||
return V_LSHRREV_B32(inst);
|
return V_LSHRREV_B32(inst);
|
||||||
case Opcode::V_NOT_B32:
|
case Opcode::V_NOT_B32:
|
||||||
|
@ -183,6 +185,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||||
return V_ADD_F32(inst);
|
return V_ADD_F32(inst);
|
||||||
case Opcode::V_MED3_F32:
|
case Opcode::V_MED3_F32:
|
||||||
return V_MED3_F32(inst);
|
return V_MED3_F32(inst);
|
||||||
|
case Opcode::V_MED3_I32:
|
||||||
|
return V_MED3_I32(inst);
|
||||||
case Opcode::V_FLOOR_F32:
|
case Opcode::V_FLOOR_F32:
|
||||||
return V_FLOOR_F32(inst);
|
return V_FLOOR_F32(inst);
|
||||||
case Opcode::V_SUB_F32:
|
case Opcode::V_SUB_F32:
|
||||||
|
@ -479,6 +483,14 @@ void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_MED3_I32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||||
|
const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2);
|
||||||
|
SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
@ -760,6 +772,12 @@ void Translator::V_ASHRREV_I32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_ASHR_I32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
||||||
V_MAD_I32_I24(inst, false);
|
V_MAD_I32_I24(inst, false);
|
||||||
}
|
}
|
||||||
|
@ -925,25 +943,12 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
|
||||||
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
const IR::U32 lane_id = ir.LaneId();
|
if (!is_low) {
|
||||||
|
ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U);
|
||||||
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
|
return;
|
||||||
if (profile.subgroup_size == 32) {
|
}
|
||||||
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
|
ASSERT(src0.IsImmediate() && src0.U32() == ~0U);
|
||||||
return std::make_pair(warp_half, lane_id);
|
SetDst(inst.dst[0], ir.LaneId());
|
||||||
}
|
|
||||||
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
|
|
||||||
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
|
|
||||||
return std::make_pair(warp_half, mask_shift);
|
|
||||||
}();
|
|
||||||
|
|
||||||
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
|
|
||||||
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
|
|
||||||
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, is_low ? ir.Imm32(~0U) : thread_mask,
|
|
||||||
is_low ? thread_mask : ir.Imm32(0))};
|
|
||||||
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
|
|
||||||
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
|
|
||||||
SetDst(inst.dst[0], result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <coroutine>
|
#include <coroutine>
|
||||||
#include <functional>
|
#include <exception>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
@ -1040,7 +1040,11 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
void unhandled_exception() {
|
void unhandled_exception() {
|
||||||
UNREACHABLE();
|
try {
|
||||||
|
std::rethrow_exception(std::current_exception());
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
UNREACHABLE_MSG("Unhandled exception: {}", e.what());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void return_void() {}
|
void return_void() {}
|
||||||
struct empty {};
|
struct empty {};
|
||||||
|
|
|
@ -94,7 +94,9 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
||||||
const auto vsharp = buffer.GetVsharp(info);
|
const auto vsharp = buffer.GetVsharp(info);
|
||||||
const u32 size = vsharp.GetSize();
|
const u32 size = vsharp.GetSize();
|
||||||
const VAddr address = vsharp.base_address;
|
const VAddr address = vsharp.base_address;
|
||||||
texture_cache.OnCpuWrite(address);
|
if (buffer.is_storage) {
|
||||||
|
texture_cache.OnCpuWrite(address);
|
||||||
|
}
|
||||||
const u32 offset = staging.Copy(address, size,
|
const u32 offset = staging.Copy(address, size,
|
||||||
buffer.is_storage ? instance.StorageMinAlignment()
|
buffer.is_storage ? instance.StorageMinAlignment()
|
||||||
: instance.UniformMinAlignment());
|
: instance.UniformMinAlignment());
|
||||||
|
|
Loading…
Reference in New Issue