From 3a8d108b2ba2aaf2c9edd795951d6ac656ef6729 Mon Sep 17 00:00:00 2001 From: Nikolai Bozhenov Date: Thu, 24 Nov 2016 13:23:35 +0000 Subject: [PATCH] [x86] Fixing PR28755 by precomputing the address used in CMPXCHG8B The bug arises during register allocation on i686 for CMPXCHG8B instruction when base pointer is needed. CMPXCHG8B needs 4 implicit registers (EAX, EBX, ECX, EDX) and a memory address, plus ESI is reserved as the base pointer. With such constraints the only way register allocator would do its job successfully is when the addressing mode of the instruction requires only one register. If that is not the case - we are emitting additional LEA instruction to compute the address. It fixes PR28755. Patch by Alexander Ivchenko Differential Revision: https://reviews.llvm.org/D25088 llvm-svn: 287875 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 51 ++++++++++++++++++++++ llvm/lib/Target/X86/X86InstrBuilder.h | 11 +++++ llvm/lib/Target/X86/X86InstrCompiler.td | 2 +- .../X86/cmpxchg8b_alloca_regalloc_handling.ll | 35 +++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 33fc5ac..80bcadc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25153,6 +25153,57 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + case X86::LCMPXCHG8B: { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B + // requires a memory operand. If it happens that current architecture is + // i686 and for current function we need a base pointer + // - which is ESI for i686 - register allocator would not be able to + // allocate registers for an address in form of X(%reg, %reg, Y) + // - there never would be enough unreserved registers during regalloc + // (without the need for base ptr the only option would be X(%edi, %esi, Y). + // We are giving a hand to register allocator by precomputing the address in + // a new vreg using LEA. + + // If it is not i686 or there is no base pointer - nothing to do here. + if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) + return BB; + + // Even though this code does not necessarily needs the base pointer to + // be ESI, we check for that. The reason: if this assert fails, there are + // some changes happened in the compiler base pointer handling, which most + // probably have to be addressed somehow here. + assert(TRI->getBaseRegister() == X86::ESI && + "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " + "base pointer in mind"); + + MachineRegisterInfo &MRI = MF->getRegInfo(); + MVT SPTy = getPointerTy(MF->getDataLayout()); + const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); + unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); + + X86AddressMode AM = getAddressFromInstr(&MI, 0); + // Regalloc does not need any help when the memory operand of CMPXCHG8B + // does not use index register. + if (AM.IndexReg == X86::NoRegister) + return BB; + + // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its + // four operand definitions that are E[ABCD] registers. We skip them and + // then insert the LEA. + MachineBasicBlock::iterator MBBI(MI); + while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) || + MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) + --MBBI; + addFullAddress( + BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); + + setDirectAddressInInstr(&MI, 0, computedAddrVReg); + + return BB; + } + case X86::LCMPXCHG16B: + return BB; case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { unsigned BasePtr = diff --git a/llvm/lib/Target/X86/X86InstrBuilder.h b/llvm/lib/Target/X86/X86InstrBuilder.h index ec227cc..abf37d9 100644 --- a/llvm/lib/Target/X86/X86InstrBuilder.h +++ b/llvm/lib/Target/X86/X86InstrBuilder.h @@ -128,6 +128,17 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0); } +/// Replace the address used in the instruction with the direct memory +/// reference. +static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, + unsigned Reg) { + // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg. + MI->getOperand(Operand).setReg(Reg); + MI->getOperand(Operand + 1).setImm(1); + MI->getOperand(Operand + 2).setReg(0); + MI->getOperand(Operand + 3).setImm(0); + MI->getOperand(Operand + 4).setReg(0); +} static inline const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset) { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index e5cd1ef..3c27eb8 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -723,7 +723,7 @@ defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">; multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, usesCustomInserter = 1 in { def NAME : I, TB, LOCK; diff --git a/llvm/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll b/llvm/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll new file mode 100644 index 0000000..8a325c4 --- /dev/null +++ b/llvm/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s +; PR28755 + +; Check that register allocator is able to handle that +; a-lot-of-fixed-and-reserved-registers case. We do that by +; emmiting lea before 4 cmpxchg8b operands generators. + +define void @foo_alloca(i64* %a, i32 %off, i32 %n) { + %dummy = alloca i32, i32 %n + %addr = getelementptr inbounds i64, i64* %a, i32 %off + + %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic + ret void +} + +; CHECK-LABEL: foo_alloca +; CHECK: leal {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]] +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl $1, %ebx +; CHECK-NEXT: lock cmpxchg8b ([[REGISTER]]) + +; If we don't use index register in the address mode - +; check that we did not generate the lea. +define void @foo_alloca_direct_address(i64* %addr, i32 %n) { + %dummy = alloca i32, i32 %n + + %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic + ret void +} + +; CHECK-LABEL: foo_alloca_direct_address +; CHECK-NOT: leal {{\(%e.*\)}}, [[REGISTER:%e.i]] +; CHECK: lock cmpxchg8b ([[REGISTER]]) -- 2.7.4