Merge pull request #229 from shadps4-emu/stabilization11
video_core: Address regressions from last few PRs
This commit is contained in:
commit
33683cfdd7
|
@ -221,8 +221,8 @@ struct AddressSpace::Impl {
|
||||||
void* hint_address = reinterpret_cast<void*>(SYSTEM_MANAGED_MIN);
|
void* hint_address = reinterpret_cast<void*>(SYSTEM_MANAGED_MIN);
|
||||||
virtual_size = SystemSize + UserSize;
|
virtual_size = SystemSize + UserSize;
|
||||||
virtual_base = reinterpret_cast<u8*>(
|
virtual_base = reinterpret_cast<u8*>(
|
||||||
mmap(reinterpret_cast<void*>(hint_address), virtual_size, PROT_READ | PROT_WRITE,
|
mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE,
|
||||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
|
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0));
|
||||||
if (virtual_base == MAP_FAILED) {
|
if (virtual_base == MAP_FAILED) {
|
||||||
LOG_CRITICAL(Kernel_Vmm, "mmap failed: {}", strerror(errno));
|
LOG_CRITICAL(Kernel_Vmm, "mmap failed: {}", strerror(errno));
|
||||||
throw std::bad_alloc{};
|
throw std::bad_alloc{};
|
||||||
|
|
|
@ -1357,6 +1357,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
||||||
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
||||||
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
||||||
|
LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait);
|
||||||
LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait);
|
LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait);
|
||||||
LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
|
LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
|
||||||
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
||||||
|
|
|
@ -113,8 +113,13 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) {
|
||||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
|
const Id zero{ctx.f32_zero_value};
|
||||||
|
return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
|
|
|
@ -349,7 +349,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||||
Id ms);
|
Id ms);
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id derivatives, const IR::Value& offset, Id lod_clamp);
|
Id derivatives, const IR::Value& offset, Id lod_clamp);
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||||
|
|
|
@ -1826,17 +1826,13 @@ constexpr std::array<InstFormat, 71> InstructionFormatVOP1 = {{
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32,
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32,
|
||||||
ScalarType::Float64},
|
ScalarType::Float64},
|
||||||
// 17 = V_CVT_F32_UBYTE0
|
// 17 = V_CVT_F32_UBYTE0
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined,
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32},
|
||||||
ScalarType::Undefined},
|
|
||||||
// 18 = V_CVT_F32_UBYTE1
|
// 18 = V_CVT_F32_UBYTE1
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined,
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32},
|
||||||
ScalarType::Undefined},
|
|
||||||
// 19 = V_CVT_F32_UBYTE2
|
// 19 = V_CVT_F32_UBYTE2
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined,
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32},
|
||||||
ScalarType::Undefined},
|
|
||||||
// 20 = V_CVT_F32_UBYTE3
|
// 20 = V_CVT_F32_UBYTE3
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined,
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32},
|
||||||
ScalarType::Undefined},
|
|
||||||
// 21 = V_CVT_U32_F64
|
// 21 = V_CVT_U32_F64
|
||||||
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32},
|
{InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32},
|
||||||
// 22 = V_CVT_F64_U32
|
// 22 = V_CVT_F64_U32
|
||||||
|
|
|
@ -197,8 +197,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
|
|
||||||
// Read the V# of the attribute to figure out component number and type.
|
// Read the V# of the attribute to figure out component number and type.
|
||||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||||
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
for (u32 i = 0; i < 4; i++) {
|
||||||
for (u32 i = 0; i < num_components; i++) {
|
|
||||||
const IR::F32 comp = [&] {
|
const IR::F32 comp = [&] {
|
||||||
switch (buffer.GetSwizzle(i)) {
|
switch (buffer.GetSwizzle(i)) {
|
||||||
case AmdGpu::CompSwizzle::One:
|
case AmdGpu::CompSwizzle::One:
|
||||||
|
@ -225,6 +224,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
attrib.instance_data);
|
attrib.instance_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
||||||
info.vs_inputs.push_back({
|
info.vs_inputs.push_back({
|
||||||
.fmt = buffer.num_format,
|
.fmt = buffer.num_format,
|
||||||
.binding = attrib.semantic,
|
.binding = attrib.semantic,
|
||||||
|
@ -268,7 +268,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
translator.V_AND_B32(inst);
|
translator.V_AND_B32(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_OR_B32:
|
case Opcode::V_OR_B32:
|
||||||
translator.V_OR_B32(inst);
|
translator.V_OR_B32(false, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_XOR_B32:
|
||||||
|
translator.V_OR_B32(true, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_LSHLREV_B32:
|
case Opcode::V_LSHLREV_B32:
|
||||||
translator.V_LSHLREV_B32(inst);
|
translator.V_LSHLREV_B32(inst);
|
||||||
|
@ -324,6 +327,24 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||||
translator.V_CVT_PKRTZ_F16_F32(inst);
|
translator.V_CVT_PKRTZ_F16_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_CVT_F32_F16:
|
||||||
|
translator.V_CVT_F32_F16(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_UBYTE0:
|
||||||
|
translator.V_CVT_F32_UBYTE(0, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_UBYTE1:
|
||||||
|
translator.V_CVT_F32_UBYTE(1, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_UBYTE2:
|
||||||
|
translator.V_CVT_F32_UBYTE(2, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_UBYTE3:
|
||||||
|
translator.V_CVT_F32_UBYTE(3, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_BFREV_B32:
|
||||||
|
translator.V_BFREV_B32(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_FRACT_F32:
|
case Opcode::V_FRACT_F32:
|
||||||
translator.V_FRACT_F32(inst);
|
translator.V_FRACT_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -355,6 +376,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::IMAGE_SAMPLE_L:
|
case Opcode::IMAGE_SAMPLE_L:
|
||||||
translator.IMAGE_SAMPLE(inst);
|
translator.IMAGE_SAMPLE(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::IMAGE_GET_LOD:
|
||||||
|
translator.IMAGE_GET_LOD(inst);
|
||||||
|
break;
|
||||||
case Opcode::IMAGE_GATHER4_C:
|
case Opcode::IMAGE_GATHER4_C:
|
||||||
translator.IMAGE_GATHER(inst);
|
translator.IMAGE_GATHER(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -682,7 +706,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
translator.V_SAD_U32(inst);
|
translator.V_SAD_U32(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_BFE_U32:
|
case Opcode::V_BFE_U32:
|
||||||
translator.V_BFE_U32(inst);
|
translator.V_BFE_U32(false, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_BFE_I32:
|
||||||
|
translator.V_BFE_U32(true, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_MAD_I32_I24:
|
case Opcode::V_MAD_I32_I24:
|
||||||
translator.V_MAD_I32_I24(inst);
|
translator.V_MAD_I32_I24(inst);
|
||||||
|
|
|
@ -71,9 +71,10 @@ public:
|
||||||
void V_SAD(const GcnInst& inst);
|
void V_SAD(const GcnInst& inst);
|
||||||
void V_MAC_F32(const GcnInst& inst);
|
void V_MAC_F32(const GcnInst& inst);
|
||||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||||
|
void V_CVT_F32_F16(const GcnInst& inst);
|
||||||
void V_MUL_F32(const GcnInst& inst);
|
void V_MUL_F32(const GcnInst& inst);
|
||||||
void V_CNDMASK_B32(const GcnInst& inst);
|
void V_CNDMASK_B32(const GcnInst& inst);
|
||||||
void V_OR_B32(const GcnInst& inst);
|
void V_OR_B32(bool is_xor, const GcnInst& inst);
|
||||||
void V_AND_B32(const GcnInst& inst);
|
void V_AND_B32(const GcnInst& inst);
|
||||||
void V_LSHLREV_B32(const GcnInst& inst);
|
void V_LSHLREV_B32(const GcnInst& inst);
|
||||||
void V_ADD_I32(const GcnInst& inst);
|
void V_ADD_I32(const GcnInst& inst);
|
||||||
|
@ -110,7 +111,7 @@ public:
|
||||||
void V_LSHRREV_B32(const GcnInst& inst);
|
void V_LSHRREV_B32(const GcnInst& inst);
|
||||||
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
||||||
void V_SAD_U32(const GcnInst& inst);
|
void V_SAD_U32(const GcnInst& inst);
|
||||||
void V_BFE_U32(const GcnInst& inst);
|
void V_BFE_U32(bool is_signed, const GcnInst& inst);
|
||||||
void V_MAD_I32_I24(const GcnInst& inst);
|
void V_MAD_I32_I24(const GcnInst& inst);
|
||||||
void V_MUL_I32_I24(const GcnInst& inst);
|
void V_MUL_I32_I24(const GcnInst& inst);
|
||||||
void V_SUB_I32(const GcnInst& inst);
|
void V_SUB_I32(const GcnInst& inst);
|
||||||
|
@ -130,6 +131,8 @@ public:
|
||||||
void V_CMP_NE_U64(const GcnInst& inst);
|
void V_CMP_NE_U64(const GcnInst& inst);
|
||||||
void V_BFI_B32(const GcnInst& inst);
|
void V_BFI_B32(const GcnInst& inst);
|
||||||
void V_NOT_B32(const GcnInst& inst);
|
void V_NOT_B32(const GcnInst& inst);
|
||||||
|
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||||
|
void V_BFREV_B32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||||
|
@ -149,6 +152,7 @@ public:
|
||||||
void IMAGE_GATHER(const GcnInst& inst);
|
void IMAGE_GATHER(const GcnInst& inst);
|
||||||
void IMAGE_STORE(const GcnInst& inst);
|
void IMAGE_STORE(const GcnInst& inst);
|
||||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||||
|
void IMAGE_GET_LOD(const GcnInst& inst);
|
||||||
|
|
||||||
// Export
|
// Export
|
||||||
void EXP(const GcnInst& inst);
|
void EXP(const GcnInst& inst);
|
||||||
|
|
|
@ -26,6 +26,11 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||||
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0 = GetSrc(inst.src[0]);
|
||||||
|
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||||
}
|
}
|
||||||
|
@ -54,11 +59,11 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_OR_B32(const GcnInst& inst) {
|
void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
ir.SetVectorReg(dst_reg, ir.BitwiseOr(src0, src1));
|
ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_AND_B32(const GcnInst& inst) {
|
void Translator::V_AND_B32(const GcnInst& inst) {
|
||||||
|
@ -345,11 +350,11 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
|
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_BFE_U32(const GcnInst& inst) {
|
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
||||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2));
|
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAD_I32_I24(const GcnInst& inst) {
|
void Translator::V_MAD_I32_I24(const GcnInst& inst) {
|
||||||
|
@ -486,4 +491,15 @@ void Translator::V_NOT_B32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.BitwiseNot(src0));
|
SetDst(inst.dst[0], ir.BitwiseNot(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8));
|
||||||
|
SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_BFREV_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
SetDst(inst.dst[0], ir.BitReverse(src0));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -307,4 +307,19 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
||||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||||
|
const auto& mimg = inst.control.mimg;
|
||||||
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
|
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||||
|
|
||||||
|
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||||
|
const IR::Value body = ir.CompositeConstruct(
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||||
|
const IR::Value lod = ir.ImageQueryLod(handle, body, {});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -1119,6 +1119,8 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val
|
||||||
switch (dest_bitsize) {
|
switch (dest_bitsize) {
|
||||||
case 32:
|
case 32:
|
||||||
switch (src_bitsize) {
|
switch (src_bitsize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<F32>(Opcode::ConvertF32U16, value);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<F32>(Opcode::ConvertF32U32, value);
|
return Inst<F32>(Opcode::ConvertF32U32, value);
|
||||||
}
|
}
|
||||||
|
@ -1139,7 +1141,7 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
|
||||||
: ConvertUToF(dest_bitsize, src_bitsize, value);
|
: ConvertUToF(dest_bitsize, src_bitsize, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
|
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
||||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -196,7 +196,7 @@ public:
|
||||||
[[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
|
[[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
|
||||||
const Value& value);
|
const Value& value);
|
||||||
|
|
||||||
[[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
|
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
|
||||||
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
|
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
|
||||||
|
|
||||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
||||||
|
|
|
@ -257,6 +257,7 @@ OPCODE(ConvertF32S32, F32, U32,
|
||||||
OPCODE(ConvertF32U32, F32, U32, )
|
OPCODE(ConvertF32U32, F32, U32, )
|
||||||
OPCODE(ConvertF64S32, F64, U32, )
|
OPCODE(ConvertF64S32, F64, U32, )
|
||||||
OPCODE(ConvertF64U32, F64, U32, )
|
OPCODE(ConvertF64U32, F64, U32, )
|
||||||
|
OPCODE(ConvertF32U16, F32, U16, )
|
||||||
|
|
||||||
// Image operations
|
// Image operations
|
||||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||||
|
|
|
@ -348,8 +348,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
}
|
}
|
||||||
if (inst_info.explicit_lod) {
|
if (inst_info.explicit_lod) {
|
||||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
||||||
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod);
|
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ||
|
||||||
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2;
|
inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod);
|
||||||
|
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3;
|
||||||
inst.SetArg(pos, arg);
|
inst.SetArg(pos, arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,6 +221,7 @@ using F32 = TypedValue<Type::F32>;
|
||||||
using F64 = TypedValue<Type::F64>;
|
using F64 = TypedValue<Type::F64>;
|
||||||
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
||||||
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||||
|
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
|
||||||
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
||||||
using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
|
using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
|
||||||
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
||||||
|
|
|
@ -683,8 +683,8 @@ struct Liverpool {
|
||||||
BitField<0, 5, TilingMode> tile_mode_index;
|
BitField<0, 5, TilingMode> tile_mode_index;
|
||||||
BitField<5, 5, u32> fmask_tile_mode_index;
|
BitField<5, 5, u32> fmask_tile_mode_index;
|
||||||
BitField<12, 3, u32> num_samples_log2;
|
BitField<12, 3, u32> num_samples_log2;
|
||||||
BitField<15, 3, u32> num_fragments_log2;
|
BitField<15, 2, u32> num_fragments_log2;
|
||||||
BitField<18, 1, u32> force_dst_alpha_1;
|
BitField<17, 1, u32> force_dst_alpha_1;
|
||||||
} attrib;
|
} attrib;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
u32 cmask_base_address;
|
u32 cmask_base_address;
|
||||||
|
|
|
@ -56,6 +56,6 @@ void main() {
|
||||||
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||||
uint p0 = (p[ofs] >> 8) & 0xff;
|
uint p0 = (p[ofs] >> 8) & 0xff;
|
||||||
uint p1 = p[ofs] & 0xff;
|
uint p1 = p[ofs] & 0xff;
|
||||||
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0));
|
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p1, p0, 0, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,8 +93,9 @@ void Rasterizer::BeginRendering() {
|
||||||
|
|
||||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||||
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
||||||
state.width = std::min<u32>(state.width, hint.width);
|
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||||
state.height = std::min<u32>(state.height, hint.height);
|
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||||
|
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||||
|
|
||||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||||
state.color_attachments[state.num_color_attachments++] = {
|
state.color_attachments[state.num_color_attachments++] = {
|
||||||
|
@ -117,8 +118,8 @@ void Rasterizer::BeginRendering() {
|
||||||
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint,
|
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint,
|
||||||
regs.depth_control.depth_write_enable);
|
regs.depth_control.depth_write_enable);
|
||||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||||
state.width = std::min<u32>(state.width, hint.width);
|
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||||
state.height = std::min<u32>(state.height, hint.height);
|
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||||
state.depth_attachment = {
|
state.depth_attachment = {
|
||||||
.imageView = *image_view.image_view,
|
.imageView = *image_view.image_view,
|
||||||
.imageLayout = image.layout,
|
.imageLayout = image.layout,
|
||||||
|
|
|
@ -140,7 +140,7 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
|
||||||
RegisterMeta(info, image_id);
|
RegisterMeta(info, image_id);
|
||||||
|
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||||
RefreshImage(image);
|
RefreshImage(image);
|
||||||
TrackImage(image, image_id);
|
TrackImage(image, image_id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -194,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case vk::Format::eBc3UnormBlock:
|
case vk::Format::eBc3UnormBlock:
|
||||||
case vk::Format::eBc7SrgbBlock:
|
case vk::Format::eBc7SrgbBlock:
|
||||||
|
case vk::Format::eBc7UnormBlock:
|
||||||
return vk::Format::eR32G32B32A32Uint;
|
return vk::Format::eR32G32B32A32Uint;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue