Patch `insertq`

This commit is contained in:
offtkp 2024-08-28 19:23:31 +03:00
parent e57d55e6e9
commit 8e19ccd507
1 changed files with 146 additions and 3 deletions

View File

@ -40,6 +40,15 @@ static Xbyak::Reg ZydisToXbyakRegisterOperand(const ZydisDecodedOperand& operand
return ZydisToXbyakRegister(operand.reg.value); return ZydisToXbyakRegister(operand.reg.value);
} }
static Xbyak::Xmm ZydisToXbyakVectorOperand(const ZydisDecodedOperand& operand) {
const ZydisRegister reg = operand.reg.value;
if (reg >= ZYDIS_REGISTER_XMM0 && reg <= ZYDIS_REGISTER_XMM15) {
return Xbyak::Xmm(reg - ZYDIS_REGISTER_XMM0);
}
UNREACHABLE_MSG("Unsupported vector register: {}", static_cast<u32>(reg));
}
static Xbyak::Address ZydisToXbyakMemoryOperand(const ZydisDecodedOperand& operand) { static Xbyak::Address ZydisToXbyakMemoryOperand(const ZydisDecodedOperand& operand) {
ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand, got type: {}", ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand, got type: {}",
static_cast<u32>(operand.type)); static_cast<u32>(operand.type));
@ -108,9 +117,7 @@ static Xbyak::Reg AllocateScratchRegister(
UNREACHABLE_MSG("Out of scratch registers!"); UNREACHABLE_MSG("Out of scratch registers!");
} }
#ifdef __APPLE__ static constexpr u32 MaxSavedRegisters = 4;
static constexpr u32 MaxSavedRegisters = 3;
static pthread_key_t register_save_slots[MaxSavedRegisters]; static pthread_key_t register_save_slots[MaxSavedRegisters];
static std::once_flag register_save_init_flag; static std::once_flag register_save_init_flag;
@ -155,6 +162,8 @@ static void RestoreRegisters(Xbyak::CodeGenerator& c,
} }
} }
#ifdef __APPLE__
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]); const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]); const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
@ -280,6 +289,11 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
dst_op.reg.value <= ZYDIS_REGISTER_R15; dst_op.reg.value <= ZYDIS_REGISTER_R15;
} }
// For instructions that always need to be patched
static bool FilterAlwaysTrue(const ZydisDecodedOperand*) {
return true;
}
static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]); const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto slot = GetTcbKey(); const auto slot = GetTcbKey();
@ -317,6 +331,133 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
#endif #endif
} }
static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
// INSERTQ Instruction Reference
// Inserts bits from the lower 64 bits of the source operand into the lower 64 bits of the destination operand
// No other bits in the lower 64 bits of the destination are modified. The upper 64 bits of the destination are undefined.
// There's two forms of the instruction:
// INSERTQ xmm1, xmm2, imm8, imm8
// INSERTQ xmm1, xmm2
// For the immediate form:
// Insert field starting at bit 0 of xmm2 with the length
// specified by [5:0] of the first immediate byte. This
// field is inserted into xmm1 starting at the bit position
// specified by [5:0] of the second immediate byte.
// For the register form:
// Insert field starting at bit 0 of xmm2 with the length
// specified by xmm2[69:64]. This field is inserted into
// xmm1 starting at the bit position specified by
// xmm2[77:72].
// A value of zero in the field length is defined as a length of 64. If the length field is 0 and the bit index
// is 0, bits 63:0 of the source operand are inserted. For any other value of the bit index, the results are
// undefined.
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (operands[0].type != ZYDIS_OPERAND_TYPE_REGISTER || operands[1].type != ZYDIS_OPERAND_TYPE_REGISTER) {
ASSERT_MSG("operands 0 and 1 must be registers.");
}
const Xbyak::Xmm dst = ZydisToXbyakVectorOperand(operands[0]);
const Xbyak::Xmm src = ZydisToXbyakVectorOperand(operands[1]);
if (immediateForm) {
u8 length = operands[2].imm.value.u & 0x3F;
u8 index = operands[3].imm.value.u & 0x3F;
if (length == 0) {
length = 64;
}
if (length + index > 64) {
ASSERT_MSG("length + index must be less than or equal to 64.");
}
const Xbyak::Reg64 scratch1 = AllocateScratchRegister({}, 64).cvt64();
const Xbyak::Reg64 scratch2 = AllocateScratchRegister({&scratch1}, 64).cvt64();
const Xbyak::Reg64 mask = AllocateScratchRegister({&scratch1, &scratch2}, 64).cvt64();
u64 maskValue = (1ULL << length) - 1;
SaveRegisters(c, {scratch1, scratch2, mask});
c.movq(scratch1, src);
c.movq(scratch2, dst);
c.mov(mask, maskValue);
// src &= mask
c.and_(scratch1, mask);
// src <<= index
c.shl(scratch1, index);
// dst &= ~(mask << index)
maskValue = ~(maskValue << index);
c.mov(mask, maskValue);
c.and_(scratch2, mask);
// dst |= src
c.or_(scratch2, scratch1);
// Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected
c.pinsrq(dst, scratch2, 0);
RestoreRegisters(c, {scratch1, scratch2, mask});
} else {
if (operands[2].type != ZYDIS_OPERAND_TYPE_UNUSED || operands[3].type != ZYDIS_OPERAND_TYPE_UNUSED) {
ASSERT_MSG("operands 2 and 3 must be unused for register form.");
}
const Xbyak::Reg64 scratch1 = AllocateScratchRegister({}, 64).cvt64();
const Xbyak::Reg64 scratch2 = AllocateScratchRegister({&scratch1}, 64).cvt64();
const Xbyak::Reg64 index = AllocateScratchRegister({&scratch1, &scratch2}, 64).cvt64();
const Xbyak::Reg64 mask = AllocateScratchRegister({&scratch1, &scratch2, &index}, 64).cvt64();
SaveRegisters(c, {scratch1, scratch2, index, mask});
// Get upper 64 bits of src
c.pextrq(index, src, 1);
c.mov(mask, index);
c.mov(scratch1, 64); // for the cmovz below
c.and_(mask, 0x3F); // mask now holds the length
c.cmovz(mask, scratch1); // Check if length is 0, if so, set to 64
// Get index to insert at
c.shr(index, 8);
c.and_(index, 0x3F);
// Create a mask out of the length
c.mov(scratch1, 1);
c.shlx(mask, scratch1, mask);
c.sub(mask, 1);
c.movq(scratch1, src);
c.movq(scratch2, dst);
// src &= mask
c.and_(scratch1, mask);
// dst &= ~(mask << index)
c.shlx(mask, mask, index);
c.not_(mask);
c.and_(scratch2, mask);
// dst |= (src << index)
c.shlx(scratch1, scratch1, index);
c.or_(scratch2, scratch1);
// Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected
c.pinsrq(dst, scratch2, 0);
RestoreRegisters(c, {scratch1, scratch2, index, mask});
}
}
using PatchFilter = bool (*)(const ZydisDecodedOperand*); using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&); using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo { struct PatchInfo {
@ -338,6 +479,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}},
#endif #endif
{ZYDIS_MNEMONIC_INSERTQ, {FilterAlwaysTrue, GenerateINSERTQ, true}},
#ifdef __APPLE__ #ifdef __APPLE__
// BMI1 instructions that are not supported by Rosetta 2 on Apple Silicon. // BMI1 instructions that are not supported by Rosetta 2 on Apple Silicon.
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}}, {ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},