[CGP] Split large data structres to sink more GEPs

author Haicheng Wu <haicheng@codeaurora.org>

Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)

committer Haicheng Wu <haicheng@codeaurora.org>

Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)
author Haicheng Wu <haicheng@codeaurora.org>
Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)
committer Haicheng Wu <haicheng@codeaurora.org>
Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h

index 65d82df6831b3e6f8a88aa258a2666347e08974c..cdb256fed4b56360904d4440d3886a0b065d32b9 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2236,6 +2236,11 @@ public:
      return false;
    }
  
+  // Return true if CodeGenPrepare should consider splitting large offset of a
+  // GEP to make the GEP fit into the addressing mode and can be sunk into the
+  // same blocks of its users.
+  virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+
    //===--------------------------------------------------------------------===//
    // Runtime Library hooks
    //
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp

index 7efafe8250a457bcc70e372453af05425b384334..3bc8cc18268c2bfd713ca53a503e798bffde58aa 100644 (file)
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -216,6 +216,11 @@ static cl::opt<bool> AddrSinkCombineScaledReg(
      "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
      cl::desc("Allow combining of ScaledReg field in Address sinking."));
  
+static cl::opt<bool>
+    EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
+                         cl::init(true),
+                         cl::desc("Enable splitting large offset of GEP."));
+
  namespace {
  
  using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
@@ -261,6 +266,20 @@ class TypePromotionTransaction;
      /// Keep track of sext chains based on their initial value.
      DenseMap<Value *, Instruction *> SeenChainsForSExt;
  
+    /// Keep track of GEPs accessing the same data structures such as structs or
+    /// arrays that are candidates to be split later because of their large
+    /// size.
+    DenseMap<
+        AssertingVH<Value>,
+        SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
+        LargeOffsetGEPMap;
+
+    /// Keep track of new GEP base after splitting the GEPs having large offset.
+    SmallSet<AssertingVH<Value>, 2> NewGEPBases;
+
+    /// Map serial numbers to Large offset GEPs.
+    DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
+
      /// Keep track of SExt promoted.
      ValueToSExts ValToSExtendedUses;
  
@@ -322,6 +341,7 @@ class TypePromotionTransaction;
                            SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
                            unsigned CreatedInstsCost = 0);
      bool mergeSExts(Function &F);
+    bool splitLargeGEPOffsets();
      bool performAddressTypePromotion(
          Instruction *&Inst,
          bool AllowPromotionWithoutCommonHeader,
@@ -415,6 +435,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      SeenChainsForSExt.clear();
      ValToSExtendedUses.clear();
      RemovedInsts.clear();
+    LargeOffsetGEPMap.clear();
+    LargeOffsetGEPID.clear();
      for (Function::iterator I = F.begin(); I != F.end(); ) {
        BasicBlock *BB = &*I++;
        bool ModifiedDTOnIteration = false;
@@ -426,6 +448,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      }
      if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
        MadeChange |= mergeSExts(F);
+    if (!LargeOffsetGEPMap.empty())
+      MadeChange |= splitLargeGEPOffsets();
  
      // Really free removed instructions during promotion.
      for (Instruction *I : RemovedInsts)
@@ -2528,22 +2552,23 @@ class AddressingModeMatcher {
    /// The ongoing transaction where every action should be registered.
    TypePromotionTransaction &TPT;
  
+  // A GEP which has too large offset to be folded into the addressing mode.
+  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
+
    /// This is set to true when we should not do profitability checks.
    /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
    bool IgnoreProfitability;
  
-  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
-                        const TargetLowering &TLI,
-                        const TargetRegisterInfo &TRI,
-                        Type *AT, unsigned AS,
-                        Instruction *MI, ExtAddrMode &AM,
-                        const SetOfInstrs &InsertedInsts,
-                        InstrToOrigTy &PromotedInsts,
-                        TypePromotionTransaction &TPT)
+  AddressingModeMatcher(
+      SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
+      const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
+      ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
+      InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
+      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
        : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
          DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
          MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
-        PromotedInsts(PromotedInsts), TPT(TPT) {
+        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
      IgnoreProfitability = false;
    }
  
@@ -2555,20 +2580,19 @@ public:
    /// optimizations.
    /// \p PromotedInsts maps the instructions to their type before promotion.
    /// \p The ongoing transaction where every action should be registered.
-  static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
-                           Instruction *MemoryInst,
-                           SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI,
-                           const TargetRegisterInfo &TRI,
-                           const SetOfInstrs &InsertedInsts,
-                           InstrToOrigTy &PromotedInsts,
-                           TypePromotionTransaction &TPT) {
+  static ExtAddrMode
+  Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
+        SmallVectorImpl<Instruction *> &AddrModeInsts,
+        const TargetLowering &TLI, const TargetRegisterInfo &TRI,
+        const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+        TypePromotionTransaction &TPT,
+        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
      ExtAddrMode Result;
  
-    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
-                                         AccessTy, AS,
+    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
                                           MemoryInst, Result, InsertedInsts,
-                                         PromotedInsts, TPT).matchAddr(V, 0);
+                                         PromotedInsts, TPT, LargeOffsetGEP)
+                       .matchAddr(V, 0);
      (void)Success; assert(Success && "Couldn't select *anything*?");
      return Result;
    }
@@ -3787,6 +3811,30 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
          // Check to see if we can fold the base pointer in too.
          if (matchAddr(AddrInst->getOperand(0), Depth+1))
            return true;
+      } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+                 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+                 ConstantOffset > 0) {
+        // Record GEPs with non-zero offsets as candidates for splitting in the
+        // event that the offset cannot fit into the r+i addressing mode.
+        // Simple and common case that only one GEP is used in calculating the
+        // address for the memory access.
+        Value *Base = AddrInst->getOperand(0);
+        auto *BaseI = dyn_cast<Instruction>(Base);
+        auto *GEP = cast<GetElementPtrInst>(AddrInst);
+        if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+            (BaseI && !isa<CastInst>(BaseI) &&
+             !isa<GetElementPtrInst>(BaseI))) {
+          // If the base is an instruction, make sure the GEP is not in the same
+          // basic block as the base. If the base is an argument or global
+          // value, make sure the GEP is not in the entry block.  Otherwise,
+          // instruction selection can undo the split.  Also make sure the
+          // parent block allows inserting non-PHI instructions before the
+          // terminator.
+          BasicBlock *Parent =
+              BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
+          if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+            LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+        }
        }
        AddrMode.BaseOffs -= ConstantOffset;
        return false;
@@ -4197,12 +4245,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
      // will tell us if the addressing mode for the memory operation will
      // *actually* cover the shared instruction.
      ExtAddrMode Result;
+    std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+                                                                      0);
      TypePromotionTransaction::ConstRestorationPt LastKnownGood =
          TPT.getRestorationPoint();
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
-                                  AddressAccessTy, AS,
-                                  MemoryInst, Result, InsertedInsts,
-                                  PromotedInsts, TPT);
+    AddressingModeMatcher Matcher(
+        MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
+        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
      Matcher.IgnoreProfitability = true;
      bool Success = Matcher.matchAddr(Address, 0);
      (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4304,11 +4353,24 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
      // the result may differ depending on what other uses our candidate
      // addressing instructions might have.
      AddrModeInsts.clear();
+    std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+                                                                      0);
      ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
          V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
-        InsertedInsts, PromotedInsts, TPT);
-    NewAddrMode.OriginalValue = V;
+        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+
+    GetElementPtrInst *GEP = LargeOffsetGEP.first;
+    if (GEP && GEP->getParent() != MemoryInst->getParent() &&
+        !NewGEPBases.count(GEP)) {
+      // If splitting the underlying data structure can reduce the offset of a
+      // GEP, collect the GEP.  Skip the GEPs that are the new bases of
+      // previously split data structures.
+      LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
+      if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
+        LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
+    }
  
+    NewAddrMode.OriginalValue = V;
      if (!AddrModes.addNewAddrMode(NewAddrMode))
        break;
    }
@@ -4816,6 +4878,154 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
    return Changed;
  }
  
+// Spliting large data structures so that the GEPs accessing them can have
+// smaller offsets so that they can be sunk to the same blocks as their users.
+// For example, a large struct starting from %base is splitted into two parts
+// where the second part starts from %new_base.
+//
+// Before:
+// BB0:
+//   %base     =
+//
+// BB1:
+//   %gep0     = gep %base, off0
+//   %gep1     = gep %base, off1
+//   %gep2     = gep %base, off2
+//
+// BB2:
+//   %load1    = load %gep0
+//   %load2    = load %gep1
+//   %load3    = load %gep2
+//
+// After:
+// BB0:
+//   %base     =
+//   %new_base = gep %base, off0
+//
+// BB1:
+//   %new_gep0 = %new_base
+//   %new_gep1 = gep %new_base, off1 - off0
+//   %new_gep2 = gep %new_base, off2 - off0
+//
+// BB2:
+//   %load1    = load i32, i32* %new_gep0
+//   %load2    = load i32, i32* %new_gep1
+//   %load3    = load i32, i32* %new_gep2
+//
+// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
+// their offsets are smaller enough to fit into the addressing mode.
+bool CodeGenPrepare::splitLargeGEPOffsets() {
+  bool Changed = false;
+  for (auto &Entry : LargeOffsetGEPMap) {
+    Value *OldBase = Entry.first;
+    SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
+        &LargeOffsetGEPs = Entry.second;
+    auto compareGEPOffset =
+        [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
+            const std::pair<GetElementPtrInst *, int64_t> &RHS) {
+          if (LHS.first == RHS.first)
+            return false;
+          if (LHS.second != RHS.second)
+            return LHS.second < RHS.second;
+          return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
+        };
+    // Sorting all the GEPs of the same data structures based on the offsets.
+    llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(),
+               compareGEPOffset);
+    LargeOffsetGEPs.erase(
+        std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
+        LargeOffsetGEPs.end());
+    // Skip if all the GEPs have the same offsets.
+    if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
+      continue;
+    GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
+    int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
+    Value *NewBaseGEP = nullptr;
+
+    auto LargeOffsetGEP = LargeOffsetGEPs.begin();
+    while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
+      GetElementPtrInst *GEP = LargeOffsetGEP->first;
+      int64_t Offset = LargeOffsetGEP->second;
+      if (Offset != BaseOffset) {
+        TargetLowering::AddrMode AddrMode;
+        AddrMode.BaseOffs = Offset - BaseOffset;
+        // The result type of the GEP might not be the type of the memory
+        // access.
+        if (!TLI->isLegalAddressingMode(*DL, AddrMode,
+                                        GEP->getResultElementType(),
+                                        GEP->getAddressSpace())) {
+          // We need to create a new base if the offset to the current base is
+          // too large to fit into the addressing mode. So, a very large struct
+          // may be splitted into several parts.
+          BaseGEP = GEP;
+          BaseOffset = Offset;
+          NewBaseGEP = nullptr;
+        }
+      }
+
+      // Generate a new GEP to replace the current one.
+      IRBuilder<> Builder(GEP);
+      Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+      Type *I8PtrTy =
+          Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
+
+      if (!NewBaseGEP) {
+        // Create a new base if we don't have one yet.  Find the insertion
+        // pointer for the new base first.
+        BasicBlock::iterator NewBaseInsertPt;
+        BasicBlock *NewBaseInsertBB;
+        if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+          // If the base of the struct is an instruction, the new base will be
+          // inserted close to it.
+          NewBaseInsertBB = BaseI->getParent();
+          if (isa<PHINode>(BaseI))
+            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+          else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+            NewBaseInsertBB =
+                SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+          } else
+            NewBaseInsertPt = std::next(BaseI->getIterator());
+        } else {
+          // If the current base is an argument or global value, the new base
+          // will be inserted to the entry block.
+          NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+        }
+        IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+        // Create a new base.
+        Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+        NewBaseGEP = OldBase;
+        if (NewBaseGEP->getType() != I8PtrTy)
+          NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+        NewBaseGEP =
+            NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+        NewGEPBases.insert(NewBaseGEP);
+      }
+
+      Value *NewGEP = NewBaseGEP;
+      if (Offset == BaseOffset) {
+        if (GEP->getType() != I8PtrTy)
+          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+      } else {
+        // Calculate the new offset for the new GEP.
+        Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+        NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
+
+        if (GEP->getType() != I8PtrTy)
+          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+      }
+      GEP->replaceAllUsesWith(NewGEP);
+      LargeOffsetGEPID.erase(GEP);
+      LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
+      GEP->eraseFromParent();
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
  /// Return true, if an ext(load) can be formed from an extension in
  /// \p MovedExts.
  bool CodeGenPrepare::canFormExtLd(
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index dbf219a3839420be160b1947e0eef56923ddcdd2..e394a4d90e05814d0112999b5b09f3e4a7d5ce60 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8285,6 +8285,11 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
    return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
  }
  
+bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
+  // Consider splitting large offset of struct or array.
+  return true;
+}
+
  int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
                                                  const AddrMode &AM, Type *Ty,
                                                  unsigned AS) const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h

index e48fa95224c6e3e9e9b7f63222e1ea4f7b04dc1f..7d300d628baa93619b97d7bc47db15c7eac6c00c 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -335,6 +335,8 @@ public:
    bool isLegalAddImmediate(int64_t) const override;
    bool isLegalICmpImmediate(int64_t) const override;
  
+  bool shouldConsiderGEPOffsetSplit() const override;
+
    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                            bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                            MachineFunction &MF) const override;
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll

new file mode 100644 (file)

index 0000000..a5878e0
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+
+%struct_type = type { [10000 x i32], i32, i32 }
+
+define void @test1(%struct_type** %s, i32 %n) {
+; CHECK-LABEL: test1
+entry:
+  %struct = load %struct_type*, %struct_type** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test2(%struct_type* %struct, i32 %n) {
+; CHECK-LABEL: test2
+entry:
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) {
+; CHECK-LABEL: test3
+entry:
+  br i1 %cond, label %if_true, label %if_end
+
+if_true:
+  br label %if_end
+
+if_end:
+  %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ]
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+declare %struct_type* @foo()
+
+define void @test4(i32 %n) personality i32 (...)* @__FrameHandler {
+; CHECK-LABEL: test4
+entry:
+  %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+
+cleanup:
+  landingpad { i8*, i32 } cleanup
+  unreachable
+}
+
+declare i32 @__FrameHandler(...)
+
+define void @test5([65536 x i32]** %s, i32 %n) {
+; CHECK-LABEL: test5
+entry:
+  %struct = load [65536 x i32]*, [65536 x i32]** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #14464
+; CHECK-NOT: mov     w{{[0-9]+}}, #14468
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
author	Haicheng Wu <haicheng@codeaurora.org>
	Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)
committer	Haicheng Wu <haicheng@codeaurora.org>
	Thu, 10 May 2018 18:27:36 +0000 (18:27 +0000)
llvm/include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
llvm/lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll	[new file with mode: 0644]	patch \| blob