[SROA] Propagate !range metadata when moving loads.

author Davide Italiano <davide@freebsd.org>

Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)

committer Davide Italiano <davide@freebsd.org>

Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)
author Davide Italiano <davide@freebsd.org>
Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)
committer Davide Italiano <davide@freebsd.org>
Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp

index 2d668f9..8547960 100644 (file)
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2455,15 +2455,10 @@ private:
        // are different types, for example by mapping !nonnull metadata to
        // !range metadata by modeling the null pointer constant converted to the
        // integer type.
-      // FIXME: Add support for range metadata here. Currently the utilities
-      // for this don't propagate range metadata in trivial cases from one
-      // integer load to another, don't handle non-addrspace-0 null pointers
-      // correctly, and don't have any support for mapping ranges as the
-      // integer type becomes winder or narrower.
        if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
          copyNonnullMetadata(LI, N, *NewLI);
-
-      // Try to preserve nonnull metadata
+      if (MDNode *N = LI.getMetadata(LLVMContext::MD_range))
+        copyRangeMetadata(DL, LI, N, *NewLI);
        V = NewLI;
  
        // If this is an integer load past the end of the slice (which means the
@@ -3654,7 +3649,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                           PartPtrTy, BasePtr->getName() + "."),
            getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
            LI->getName());
-      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); 
+      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
  
        // Append this load onto the list of split loads so we can find it later
        // to rewrite the stores.
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp

index 3f76295..fa42902 100644 (file)
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1947,18 +1947,24 @@ void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
  void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
                               MDNode *N, LoadInst &NewLI) {
    auto *NewTy = NewLI.getType();
+  auto *OldTy = OldLI.getType();
  
-  // Give up unless it is converted to a pointer where there is a single very
-  // valuable mapping we can do reliably.
-  // FIXME: It would be nice to propagate this in more ways, but the type
-  // conversions make it hard.
-  if (!NewTy->isPointerTy())
+  if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) &&
+      NewTy->isIntegerTy()) {
+    // An integer with the same number of bits - give it the range
+    // metadata!.
+    NewLI.setMetadata(LLVMContext::MD_range, N);
      return;
+  }
  
-  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
-  if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
-    MDNode *NN = MDNode::get(OldLI.getContext(), None);
-    NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+  if (NewTy->isPointerTy()) {
+    // Try to convert the !range metadata to !nonnull metadata on the
+    // new pointer.
+    unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+    if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+      MDNode *NN = MDNode::get(OldLI.getContext(), None);
+      NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+    }
    }
  }
  
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

index fcd3bd0..8ff6510 100644 (file)
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -17,6 +17,7 @@
  
  #include "llvm/ADT/ArrayRef.h"
  #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
@@ -48,7 +49,7 @@
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  #include <algorithm>
-#include <cassert>
+
  #include <iterator>
  #include <utility>
  #include <vector>
@@ -177,6 +178,16 @@ public:
    ValVector Values;
  };
  
+/// \brief Semi-open interval of instructions that are guaranteed to
+/// all execute if the first one does.
+class GuaranteedExecutionRange {
+public:
+  unsigned Start;
+  unsigned End;
+
+  GuaranteedExecutionRange(unsigned S, unsigned E): Start(S), End(E) {}
+};
+
  /// \brief This assigns and keeps a per-bb relative ordering of load/store
  /// instructions in the block that directly load or store an alloca.
  ///
@@ -190,14 +201,109 @@ class LargeBlockInfo {
    /// the block.
    DenseMap<const Instruction *, unsigned> InstNumbers;
  
+  /// \brief For each basic block we track, keep track of the intervals
+  /// of instruction numbers of instructions that transfer control
+  /// to their successors, for propagating metadata.
+  DenseMap<const BasicBlock *, Optional<SmallVector<GuaranteedExecutionRange, 4>>>
+    GuaranteedExecutionIntervals;
+
  public:
  
-  /// This code only looks at accesses to allocas.
+  /// This code looks for stores to allocas, and for loads both for
+  /// allocas and for transferring metadata.
    static bool isInterestingInstruction(const Instruction *I) {
-    return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+    return isa<LoadInst>(I) ||
             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
    }
  
+  /// Compute the GuaranteedExecutionIntervals for a given BB.
+  ///
+  /// This is valid and remains valid as long as each interesting
+  /// instruction (see isInterestingInstruction) that
+  ///   A) existed when this LBI was cleared
+  ///   B) has not been deleted (deleting interesting instructions is fine)
+  /// are run in the same program executions and in the same order
+  /// as when this LBI was cleared.
+  ///
+  /// Because `PromoteMemoryToRegister` does not move memory loads at
+  /// all, this assumption is satisfied in this pass.
+  SmallVector<GuaranteedExecutionRange, 4> computeGEI(const BasicBlock *BB) {
+    SmallVector<GuaranteedExecutionRange, 4> GuaranteedExecutionIntervals;
+
+    unsigned InstNo = 0;
+    bool InRange = false;
+    unsigned FirstInstInRange = 0;
+    for (const Instruction &BBI : *BB) {
+      if (isGuaranteedToTransferExecutionToSuccessor(&BBI)) {
+        if (!InRange && isInterestingInstruction(&BBI)) {
+          InRange = true;
+          FirstInstInRange = InstNo;
+        }
+      } else {
+        if (InRange) {
+          assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
+          GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
+        }
+        InRange = false;
+      }
+
+      if (isInterestingInstruction(&BBI)) {
+        auto It = InstNumbers.find(&BBI);
+        assert(It != InstNumbers.end() &&
+               InstNo <= It->second &&
+               "missing number for interesting instruction");
+        InstNo = It->second + 1;
+      }
+    }
+
+    if (InRange) {
+      assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
+      GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
+    }
+
+    return GuaranteedExecutionIntervals;
+  }
+
+  /// Return true if, when CxtI executes, it is guaranteed that either
+  /// I had executed already or that I is guaranteed to be later executed.
+  ///
+  /// The useful property this guarantees is that if I exhibits undefined
+  /// behavior under some circumstances, then the whole program will exhibit
+  /// undefined behavior at CxtI.
+  bool isGuaranteedToBeExecuted(const Instruction *CxtI, const Instruction *I) {
+    const BasicBlock *BB = CxtI->getParent();
+
+    if (BB != I->getParent()) {
+      // Instructions in different basic blocks, so control flow
+      // can diverge between them (we could track this with
+      // postdoms, but we don't bother).
+      return false;
+    }
+
+    unsigned Index1 = getInstructionIndex(CxtI);
+    unsigned Index2 = getInstructionIndex(I);
+
+    auto& BBGEI = GuaranteedExecutionIntervals[BB];
+    if (!BBGEI.hasValue()) {
+      BBGEI.emplace(computeGEI(BB));
+    }
+
+    // We want to check whether I and CxtI are in the same range. To do that,
+    // we notice that CxtI can only be in the first range R where
+    // CxtI.end < R.end. If we find that range using binary search,
+    // we can check whether I and CxtI are both in it.
+    GuaranteedExecutionRange Bound(Index1, Index1);
+    auto R = std::upper_bound(
+      BBGEI->begin(), BBGEI->end(), Bound,
+      [](GuaranteedExecutionRange I_, GuaranteedExecutionRange R) {
+        return I_.End < R.End;
+      });
+
+    return R != BBGEI->end() &&
+      R->Start <= Index1 && Index1 < R->End &&
+      R->Start <= Index2 && Index2 < R->End;
+  }
+
    /// Get or calculate the index of the specified instruction.
    unsigned getInstructionIndex(const Instruction *I) {
      assert(isInterestingInstruction(I) &&
@@ -213,9 +319,11 @@ public:
      // avoid gratuitus rescans.
      const BasicBlock *BB = I->getParent();
      unsigned InstNo = 0;
+    GuaranteedExecutionIntervals.erase(BB);
      for (const Instruction &BBI : *BB)
        if (isInterestingInstruction(&BBI))
          InstNumbers[&BBI] = InstNo++;
+
      It = InstNumbers.find(I);
  
      assert(It != InstNumbers.end() && "Didn't insert instruction?");
@@ -224,7 +332,10 @@ public:
  
    void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
  
-  void clear() { InstNumbers.clear(); }
+  void clear() {
+    InstNumbers.clear();
+    GuaranteedExecutionIntervals.clear();
+  }
  };
  
  struct PromoteMem2Reg {
@@ -303,6 +414,7 @@ private:
                             SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                    RenamePassData::ValVector &IncVals,
+                  LargeBlockInfo &LBI,
                    std::vector<RenamePassData> &Worklist);
    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
  };
@@ -321,6 +433,32 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
    AC->registerAssumption(CI);
  }
  
+static void addAssumptionsFromMetadata(LoadInst *LI,
+                                       Value *ReplVal,
+                                       DominatorTree &DT,
+                                       const DataLayout &DL,
+              LargeBlockInfo &LBI,
+                                       AssumptionCache *AC)
+{
+  if (LI->getMetadata(LLVMContext::MD_nonnull) &&
+      !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) {
+    addAssumeNonNull(AC, LI);
+  }
+
+  if (auto *N = LI->getMetadata(LLVMContext::MD_range)) {
+    // Range metadata is harder to use as an assumption,
+    // so don't try to add one, but *do* try to copy
+    // the metadata to a load in the same BB.
+    if (LoadInst *NewLI = dyn_cast<LoadInst>(ReplVal)) {
+      DEBUG(dbgs() << "trying to move !range metadata from" <<
+        *LI << " to" << *NewLI << "\n");
+      if (LBI.isGuaranteedToBeExecuted(LI, NewLI)) {
+        copyRangeMetadata(DL, *LI, N, *NewLI);
+      }
+    }
+  }
+}
+
  static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
    // Knowing that this alloca is promotable, we know that it's safe to kill all
    // instructions except for load and store.
@@ -409,9 +547,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
      // If the load was marked as nonnull we don't want to lose
      // that information when we erase this Load. So we preserve
      // it with an assume.
-    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-        !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
-      addAssumeNonNull(AC, LI);
+    addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
  
      LI->replaceAllUsesWith(ReplVal);
      LI->eraseFromParent();
@@ -505,9 +641,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
        // Note, if the load was marked as nonnull we don't want to lose that
        // information when we erase it. So we preserve it with an assume.
        Value *ReplVal = std::prev(I)->second->getOperand(0);
-      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-          !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
-        addAssumeNonNull(AC, LI);
+      addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
  
        LI->replaceAllUsesWith(ReplVal);
      }
@@ -643,7 +777,6 @@ void PromoteMem2Reg::run() {
  
    if (Allocas.empty())
      return; // All of the allocas must have been trivial!
-
    LBI.clear();
  
    // Set the incoming values for the basic block to be null values for all of
@@ -661,9 +794,10 @@ void PromoteMem2Reg::run() {
      RenamePassData RPD = std::move(RenamePassWorkList.back());
      RenamePassWorkList.pop_back();
      // RenamePass may add new worklist entries.
-    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+    RenamePass(RPD.BB, RPD.Pred, RPD.Values, LBI, RenamePassWorkList);
    } while (!RenamePassWorkList.empty());
  
+  LBI.clear();
    // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
    Visited.clear();
  
@@ -875,6 +1009,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
  /// predecessor block Pred.
  void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
                                  RenamePassData::ValVector &IncomingVals,
+       LargeBlockInfo &LBI,
                                  std::vector<RenamePassData> &Worklist) {
  NextIteration:
    // If we are inserting any phi nodes into this BB, they will already be in the
@@ -941,13 +1076,12 @@ NextIteration:
        // If the load was marked as nonnull we don't want to lose
        // that information when we erase this Load. So we preserve
        // it with an assume.
-      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-          !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
-        addAssumeNonNull(AC, LI);
+      addAssumptionsFromMetadata(LI, V, DT, SQ.DL, LBI, AC);
  
        // Anything using the load now uses the current value.
        LI->replaceAllUsesWith(V);
        BB->getInstList().erase(LI);
+      LBI.deleteValue(LI);
      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
        // Delete this instruction and mark the name as the current holder of the
        // value
@@ -965,6 +1099,7 @@ NextIteration:
        for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
          ConvertDebugDeclareToDebugValue(DII, SI, DIB);
        BB->getInstList().erase(SI);
+      LBI.deleteValue(SI);
      }
    }
  
diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll

index a29da6d..5d0e22c 100644 (file)
--- a/llvm/test/Transforms/SROA/preserve-nonnull.ll
+++ b/llvm/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,6 +3,8 @@
  ; Make sure that SROA doesn't lose nonnull metadata
  ; on loads from allocas that get optimized out.
  
+%pair = type { i64, [0 x i64], [1 x i64] }
+
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
  
  ; Check that we do basic propagation of nonnull when rewriting.
@@ -42,6 +44,23 @@ entry:
    ret float* %ret
  }
  
+; Make sure we propagate the !range attribute when we expand loads.
+define i64 @propagate_range(%pair* dereferenceable(16)) {
+; CHECK-LABEL: define i64 @propagate_range(
+; CHECK-NEXT:  start:
+; CHECK-NEXT:      %[[SROA_IDX:.*]] = getelementptr inbounds %pair
+; CHECK-NEXT:      %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1
+; CHECK:           ret i64 %[[RESULT]]
+start:
+  %a = alloca %pair
+  %1 = bitcast %pair* %0 to i8*
+  %2 = bitcast %pair* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false)
+  %3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0
+  %4 = load i64, i64* %3, !range !1
+  ret i64 %4
+}
+
  ; Make sure we properly handle the !nonnull attribute when we convert
  ; a pointer load to an integer load.
  ; FIXME: While this doesn't do anythnig actively harmful today, it really
@@ -90,3 +109,4 @@ entry:
  }
  
  !0 = !{}
+!1 = !{i64 0, i64 2}
author	Davide Italiano <davide@freebsd.org>
	Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)
committer	Davide Italiano <davide@freebsd.org>
	Mon, 27 Nov 2017 21:25:13 +0000 (21:25 +0000)
llvm/lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/Local.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp		patch \| blob \| history
llvm/test/Transforms/SROA/preserve-nonnull.ll		patch \| blob \| history