[DSE] Lift post-dominance for objs not accessible in caller.

author Florian Hahn <flo@fhahn.com>

Wed, 15 Apr 2020 09:42:58 +0000 (10:42 +0100)

committer Florian Hahn <flo@fhahn.com>

Wed, 15 Apr 2020 10:37:14 +0000 (11:37 +0100)
author Florian Hahn <flo@fhahn.com>
Wed, 15 Apr 2020 09:42:58 +0000 (10:42 +0100)
committer Florian Hahn <flo@fhahn.com>
Wed, 15 Apr 2020 10:37:14 +0000 (11:37 +0100)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

index f8a9f4f..53ae373 100644 (file)
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1484,9 +1484,12 @@ struct DSEState {
    SmallVector<MemoryDef *, 64> MemDefs;
    // Any that should be skipped as they are already deleted
    SmallPtrSet<MemoryAccess *, 4> SkipStores;
-  // Keep track of all of the objects that are invisible to the caller until the
-  // function returns.
-  SmallPtrSet<const Value *, 16> InvisibleToCaller;
+  // Keep track of all of the objects that are invisible to the caller before
+  // the function returns.
+  SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+  // Keep track of all of the objects that are invisible to the caller after
+  // the function returns.
+  SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet;
    // Keep track of blocks with throwing instructions not modeled in MemorySSA.
    SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
    // Post-order numbers for each basic block. Used to figure out if memory
@@ -1520,13 +1523,25 @@ struct DSEState {
              hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
            State.MemDefs.push_back(MD);
  
-        // Track alloca and alloca-like objects. Here we care about objects not
-        // visible to the caller during function execution. Alloca objects are
-        // invalid in the caller, for alloca-like objects we ensure that they
-        // are not captured throughout the function.
-        if (isa<AllocaInst>(&I) ||
-            (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)))
-          State.InvisibleToCaller.insert(&I);
+        // Track whether alloca and alloca-like objects are visible in the
+        // caller before and after the function returns. Alloca objects are
+        // invalid in the caller, so they are neither visible before or after
+        // the function returns.
+        if (isa<AllocaInst>(&I)) {
+          State.InvisibleToCallerBeforeRet.insert(&I);
+          State.InvisibleToCallerAfterRet.insert(&I);
+        }
+
+        // For alloca-like objects we need to check if they are captured before
+        // the function returns and if the return might capture the object.
+        if (isAllocLikeFn(&I, &TLI)) {
+          bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true);
+          if (!CapturesBeforeRet) {
+            State.InvisibleToCallerBeforeRet.insert(&I);
+            if (!PointerMayBeCaptured(&I, true, false))
+              State.InvisibleToCallerAfterRet.insert(&I);
+          }
+        }
        }
      }
  
@@ -1534,7 +1549,7 @@ struct DSEState {
      // dead at the end of the function.
      for (Argument &AI : F.args())
        if (AI.hasByValOrInAllocaAttr())
-        State.InvisibleToCaller.insert(&AI);
+        State.InvisibleToCallerBeforeRet.insert(&AI);
      return State;
    }
  
@@ -1603,14 +1618,15 @@ struct DSEState {
    }
  
    // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
-  // read access in between or return None otherwise. The returned value may not
-  // (completely) overwrite \p DefLoc. Currently we bail out when we encounter
-  // an aliasing MemoryUse (read).
-  Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
-                                           MemoryAccess *Current,
-                                           MemoryLocation DefLoc,
-                                           bool DefVisibleToCaller,
-                                           int &ScanLimit) const {
+  // read access between them or on any other path to a function exit block if
+  // \p DefLoc is not accessible after the function returns. If there is no such
+  // MemoryDef, return None. The returned value may not (completely) overwrite
+  // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse
+  // (read).
+  Optional<MemoryAccess *>
+  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current,
+                  MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet,
+                  bool DefVisibleToCallerAfterRet, int &ScanLimit) const {
      MemoryAccess *DomAccess;
      bool StepAgain;
      LLVM_DEBUG(dbgs() << "  trying to get dominating access for " << *Current
@@ -1636,12 +1652,13 @@ struct DSEState {
        if (isa<MemoryPhi>(DomAccess))
          break;
  
-      // Check if we can skip DomDef for DSE. We also require the KillingDef
-      // execute whenever DomDef executes and use post-dominance to ensure that.
-
+      // Check if we can skip DomDef for DSE. For accesses to objects that are
+      // accessible after the function returns, KillingDef must execute whenever
+      // DomDef executes and use post-dominance to ensure that.
        MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
-      if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller)) ||
-          !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) {
+      if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) ||
+          (DefVisibleToCallerAfterRet &&
+           !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) {
          StepAgain = true;
          Current = DomDef->getDefiningAccess();
        }
@@ -1764,7 +1781,7 @@ struct DSEState {
      // First see if we can ignore it by using the fact that SI is an
      // alloca/alloca like object that is not visible to the caller during
      // execution of the function.
-    if (SILocUnd && InvisibleToCaller.count(SILocUnd))
+    if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd))
        return false;
  
      if (SI->getParent() == NI->getParent())
@@ -1782,7 +1799,7 @@ struct DSEState {
                      MemoryLocation &NILoc) const {
      // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
      // like object that does not escape.
-    if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd))
+    if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd))
        return true;
  
      if (NI->isAtomic()) {
@@ -1823,10 +1840,15 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
      const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
      Instruction *DefObj =
          const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
-    bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd);
-    if (DefObj && ((isAllocLikeFn(DefObj, &TLI) &&
-                    !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT))))
-      DefVisibleToCaller = false;
+    bool DefVisibleToCallerBeforeRet =
+        !State.InvisibleToCallerBeforeRet.count(SILocUnd);
+    bool DefVisibleToCallerAfterRet =
+        !State.InvisibleToCallerAfterRet.count(SILocUnd);
+    if (DefObj && isAllocLikeFn(DefObj, &TLI)) {
+      if (DefVisibleToCallerBeforeRet)
+        DefVisibleToCallerBeforeRet =
+            PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT);
+    }
  
      MemoryAccess *Current = KillingDef;
      LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
@@ -1844,7 +1866,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
          continue;
  
        Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, DefVisibleToCaller, ScanLimit);
+          KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet,
+          DefVisibleToCallerAfterRet, ScanLimit);
  
        if (!Next) {
          LLVM_DEBUG(dbgs() << "  finished walk\n");
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll

index 741508b..115540e 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
@@ -13,7 +13,7 @@ define void @test1(i1 %c) {
  ; CHECK:       cond.true.i.i.i:
  ; CHECK-NEXT:    ret void
  ; CHECK:       cond.end.i.i.i:
-; CHECK-NEXT:    [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam() #2
+; CHECK-NEXT:    [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam()
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL_I_I_I_I_I]] to i64*
  ; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I]], i8 0, i64 undef, i1 false)
  ; CHECK-NEXT:    store i64 0, i64* [[TMP0]], align 8
@@ -42,7 +42,7 @@ define void @test2(i1 %c) {
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[CLEANUP_CONT104:%.*]], label [[IF_THEN:%.*]]
  ; CHECK:       if.then:
  ; CHECK-NEXT:    [[MUL_I_I_I_I:%.*]] = shl nuw nsw i64 undef, 3
-; CHECK-NEXT:    [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm() #2
+; CHECK-NEXT:    [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm()
  ; CHECK-NEXT:    [[DOTCAST_I_I:%.*]] = bitcast i8* [[CALL_I_I_I_I_I_I131]] to i64*
  ; CHECK-NEXT:    store i64 0, i64* [[DOTCAST_I_I]], align 8
  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I_I131]], i8 0, i64 [[MUL_I_I_I_I]], i1 false)
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll

index cc35d50..20b2d28 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
@@ -1,8 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  
-; XFAIL: *
-; TODO: Handling of free not implemented yet.
-
  ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
  
  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -308,7 +305,7 @@ declare %struct.NodePtrVecStruct* @NodePtrVec_new(i32)
  define noalias %struct.SystemCallMapElementStruct* @SystemCallMapElement_new(i8* nocapture readonly %label, i32 %initialSize) {
  ; CHECK-LABEL: @SystemCallMapElement_new(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #7
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #6
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.SystemCallMapElementStruct*
  ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null
  ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_THEN:%.*]]
@@ -380,7 +377,7 @@ cleanup:                                          ; preds = %entry, %if.end, %if
  define noalias %struct.BitfieldStruct* @Bitfield_new(i32 %bitsNeeded) {
  ; CHECK-LABEL: @Bitfield_new(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #7
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #6
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.BitfieldStruct*
  ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null
  ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -388,7 +385,7 @@ define noalias %struct.BitfieldStruct* @Bitfield_new(i32 %bitsNeeded) {
  ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[BITSNEEDED:%.*]], 7
  ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[ADD]], 8
  ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-; CHECK-NEXT:    [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #7
  ; CHECK-NEXT:    [[BITFIELD:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 8
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BITFIELD]] to i8**
  ; CHECK-NEXT:    store i8* [[CALL1]], i8** [[TMP1]], align 8
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll

index af45ec2..4500c31 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
@@ -126,7 +126,8 @@ define void @alloca_1(i1 %c) {
  ; CHECK-NEXT:    [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
  ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
  ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    br label [[BB3:%.*]]
@@ -162,7 +163,8 @@ define void @alloca_2(i1 %c) {
  ; CHECK-NEXT:    [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
  ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
  ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll

index a5d7b24..4e61ade 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
@@ -136,10 +136,11 @@ bb5:
  
  ; Tests where the pointer/object is *NOT* accessible after the function returns.
  
+; The store in the entry block can be eliminated, because it is overwritten
+; on all paths to the exit.
  define void @alloca_1(i1 %c1) {
  ; CHECK-LABEL: @alloca_1(
  ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
  ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    store i32 0, i32* [[P]]
@@ -167,10 +168,11 @@ bb5:
    ret void
  }
  
+; The store in the entry block can be eliminated, because it is overwritten
+; on all paths to the exit.
  define void @alloca_2(i1 %c.1, i1 %c.2) {
  ; CHECK-LABEL: @alloca_2(
  ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
  ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    store i32 0, i32* [[P]]
@@ -211,6 +213,8 @@ bb5:
    ret void
  }
  
+; The store in the entry block cannot be eliminated. There's a path from the
+; first store to the read in bb5, where the location is not overwritten.
  define void @alloca_3(i1 %c1) {
  ; CHECK-LABEL: @alloca_3(
  ; CHECK-NEXT:    [[P:%.*]] = alloca i32
@@ -240,10 +244,12 @@ bb5:
    ret void
  }
  
+; The store in the entry block can be eliminated, because it is overwritten
+; before the use in bb1 and not read on other paths to the function exit. The
+; object cannot be accessed by the caller.
  define void @alloca_4(i1 %c1) {
  ; CHECK-LABEL: @alloca_4(
  ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
  ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    store i32 0, i32* [[P]]
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll

index 060cb8c..b8913d0 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
@@ -175,7 +175,6 @@ bb3:
  define void @test11() {
  ; CHECK-LABEL: @test11(
  ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 0, i32* [[P]]
  ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    store i32 0, i32* [[P]]
author	Florian Hahn <flo@fhahn.com>
	Wed, 15 Apr 2020 09:42:58 +0000 (10:42 +0100)
committer	Florian Hahn <flo@fhahn.com>
	Wed, 15 Apr 2020 10:37:14 +0000 (11:37 +0100)
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll		patch \| blob \| history