case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+ case X86::LCMPXCHG8B: {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
+ // requires a memory operand. If it happens that current architecture is
+ // i686 and for current function we need a base pointer
+ // - which is ESI for i686 - register allocator would not be able to
+ // allocate registers for an address in form of X(%reg, %reg, Y)
+ // - there never would be enough unreserved registers during regalloc
+ // (without the need for base ptr the only option would be X(%edi, %esi, Y).
+ // We are giving a hand to register allocator by precomputing the address in
+ // a new vreg using LEA.
+
+ // If it is not i686 or there is no base pointer - nothing to do here.
+ if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
+ return BB;
+
+ // Even though this code does not necessarily needs the base pointer to
+ // be ESI, we check for that. The reason: if this assert fails, there are
+ // some changes happened in the compiler base pointer handling, which most
+ // probably have to be addressed somehow here.
+ assert(TRI->getBaseRegister() == X86::ESI &&
+ "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
+ "base pointer in mind");
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MVT SPTy = getPointerTy(MF->getDataLayout());
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+ unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
+
+ X86AddressMode AM = getAddressFromInstr(&MI, 0);
+ // Regalloc does not need any help when the memory operand of CMPXCHG8B
+ // does not use index register.
+ if (AM.IndexReg == X86::NoRegister)
+ return BB;
+
+ // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
+ // four operand definitions that are E[ABCD] registers. We skip them and
+ // then insert the LEA.
+ MachineBasicBlock::iterator MBBI(MI);
+ while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
+ MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
+ --MBBI;
+ addFullAddress(
+ BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+
+ setDirectAddressInInstr(&MI, 0, computedAddrVReg);
+
+ return BB;
+ }
+ case X86::LCMPXCHG16B:
+ return BB;
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =
return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
}
+/// Replace the address used in the instruction with the direct memory
+/// reference.
+static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
+ unsigned Reg) {
+ // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg.
+ MI->getOperand(Operand).setReg(Reg);
+ MI->getOperand(Operand + 1).setImm(1);
+ MI->getOperand(Operand + 2).setReg(0);
+ MI->getOperand(Operand + 3).setImm(0);
+ MI->getOperand(Operand + 4).setReg(0);
+}
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, int Offset) {
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
!strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;
--- /dev/null
+; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s
+; PR28755
+
+; Check that register allocator is able to handle that
+; a-lot-of-fixed-and-reserved-registers case. We do that by
+; emmiting lea before 4 cmpxchg8b operands generators.
+
+define void @foo_alloca(i64* %a, i32 %off, i32 %n) {
+ %dummy = alloca i32, i32 %n
+ %addr = getelementptr inbounds i64, i64* %a, i32 %off
+
+ %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+ ret void
+}
+
+; CHECK-LABEL: foo_alloca
+; CHECK: leal {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]]
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movl $1, %ebx
+; CHECK-NEXT: lock cmpxchg8b ([[REGISTER]])
+
+; If we don't use index register in the address mode -
+; check that we did not generate the lea.
+define void @foo_alloca_direct_address(i64* %addr, i32 %n) {
+ %dummy = alloca i32, i32 %n
+
+ %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+ ret void
+}
+
+; CHECK-LABEL: foo_alloca_direct_address
+; CHECK-NOT: leal {{\(%e.*\)}}, [[REGISTER:%e.i]]
+; CHECK: lock cmpxchg8b ([[REGISTER]])