From 349777d3ea6beb9e875339904af0385a451e17ef Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Thu, 12 Jun 2014 23:27:57 +0000 Subject: [PATCH] [FastISel][X86] Add MachineMemOperand to load/store instructions. This commit adds MachineMemOperands to load and store instructions. This allows the peephole optimizer to fold load instructions. Unfortunatelly the peephole optimizer currently doesn't run at -O0. llvm-svn: 210858 --- llvm/include/llvm/CodeGen/FastISel.h | 3 + llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 44 ++++++++++++ llvm/lib/Target/X86/X86FastISel.cpp | 107 ++++++++++++++++++---------- llvm/test/CodeGen/X86/fast-isel-fold-mem.ll | 12 ++++ 4 files changed, 127 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fast-isel-fold-mem.ll diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 7d9e207..c7ec6a0 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -377,6 +377,9 @@ protected: /// Test whether the given value has exactly one use. bool hasTrivialKill(const Value *V) const; + /// \brief Create a machine mem operand from the given instruction. + MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const; + private: bool SelectBinaryOp(const User *I, unsigned ISDOpcode); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 94d3be9..21fcbd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1737,3 +1737,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { return isa(cast(Add)->getOperand(1)); } +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else { + return nullptr; + } + + bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; + bool IsInvariant = I->getMetadata("invariant.load") != nullptr; + const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, TBAAInfo, Ranges); +} diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 25cd68a..e8f8805 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -78,12 +78,14 @@ public: private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); - bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, + unsigned &ResultReg); bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, - bool Aligned = false); - bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM, - bool Aligned = false); + MachineMemOperand *MMO = nullptr, bool Aligned = false); + bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, + const X86AddressMode &AM, + MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -180,7 +182,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, - unsigned &ResultReg) { + MachineMemOperand *MMO, unsigned &ResultReg) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; @@ -228,8 +230,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } ResultReg = createResultReg(RC); - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc), ResultReg), AM); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); + addFullAddress(MIB, AM); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } @@ -237,9 +242,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. -bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, - const X86AddressMode &AM, bool Aligned) { +bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, + const X86AddressMode &AM, + MachineMemOperand *MMO, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -249,7 +254,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1); + TII.get(X86::AND8ri), AndResult) + .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); ValReg = AndResult; } // FALLTHROUGH, handling i1 as i8. @@ -288,13 +294,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, break; } - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc)), AM).addReg(ValReg); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); + return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM, bool Aligned) { + const X86AddressMode &AM, + MachineMemOperand *MMO, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa(Val)) Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); @@ -317,10 +328,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, } if (Opc) { - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc)), AM) - .addImm(Signed ? (uint64_t) CI->getSExtValue() : - CI->getZExtValue()); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() + : CI->getZExtValue()); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } } @@ -329,7 +342,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, if (ValReg == 0) return false; - return X86FastEmitStore(VT, ValReg, AM, Aligned); + bool ValKill = hasTrivialKill(Val); + return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of @@ -740,19 +754,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; - unsigned SABIAlignment = - DL.getABITypeAlignment(S->getValueOperand()->getType()); - bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment; + const Value *Val = S->getValueOperand(); + const Value *Ptr = S->getPointerOperand(); MVT VT; - if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) + if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) return false; + unsigned Alignment = S->getAlignment(); + unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = ABIAlignment; + bool Aligned = Alignment >= ABIAlignment; + X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(1), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; - return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned); + return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. @@ -887,25 +906,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// -bool X86FastISel::X86SelectLoad(const Instruction *I) { +bool X86FastISel::X86SelectLoad(const Instruction *I) { + const LoadInst *LI = cast(I); + // Atomic loads need special handling. - if (cast(I)->isAtomic()) + if (LI->isAtomic()) return false; MVT VT; - if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) + if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) return false; + const Value *Ptr = LI->getPointerOperand(); + X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(0), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; unsigned ResultReg = 0; - if (X86FastEmitLoad(VT, AM, ResultReg)) { - UpdateValueMap(I, ResultReg); - return true; - } - return false; + if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) + return false; + + UpdateValueMap(I, ResultReg); + return true; } static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { @@ -1624,8 +1647,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, } unsigned Reg; - bool RV = X86FastEmitLoad(VT, SrcAM, Reg); - RV &= X86FastEmitStore(VT, Reg, DestAM); + bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); + RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); assert(RV && "Failed to emit load or store??"); unsigned Size = VT.getSizeInBits()/8; @@ -2322,7 +2345,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (!X86FastEmitStore(ArgVT, ArgVal, AM)) return false; } else { - if (!X86FastEmitStore(ArgVT, Arg, AM)) + if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM)) return false; } } @@ -2719,8 +2742,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { + const Value *Ptr = LI->getPointerOperand(); X86AddressMode AM; - if (!X86SelectAddress(LI->getOperand(0), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; const X86InstrInfo &XII = (const X86InstrInfo&)TII; @@ -2728,13 +2752,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned Size = DL.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = DL.getABITypeAlignment(LI->getType()); + SmallVector AddrOps; AM.getFullAddress(AddrOps); MachineInstr *Result = XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); - if (!Result) return false; + if (!Result) + return false; + Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; diff --git a/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll b/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll new file mode 100644 index 0000000..a945779 --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s + +define i64 @fold_load(i64* %a, i64 %b) { +; CHECK-LABEL: fold_load +; CHECK: addq (%rdi), %rsi +; CHECK-NEXT: movq %rsi, %rax + %1 = load i64* %a, align 8 + %2 = add i64 %1, %b + ret i64 %2 +} + -- 2.7.4