[DSE] Use correct memory location for read clobber check

author Nikita Popov <nikita.ppv@gmail.com>

Mon, 30 Nov 2020 22:51:54 +0000 (23:51 +0100)

committer Nikita Popov <nikita.ppv@gmail.com>

Fri, 18 Dec 2020 19:26:53 +0000 (20:26 +0100)
author Nikita Popov <nikita.ppv@gmail.com>
Mon, 30 Nov 2020 22:51:54 +0000 (23:51 +0100)
committer Nikita Popov <nikita.ppv@gmail.com>
Fri, 18 Dec 2020 19:26:53 +0000 (20:26 +0100)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

index 6cc1b43..1a1d0c0 100644 (file)
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1634,18 +1634,6 @@ struct DSEState {
    /// basic block.
    DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
  
-  struct CheckCache {
-    SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
-    SmallPtrSet<MemoryAccess *, 16> KnownReads;
-
-    bool isKnownNoRead(MemoryAccess *A) const {
-      return KnownNoReads.find(A) != KnownNoReads.end();
-    }
-    bool isKnownRead(MemoryAccess *A) const {
-      return KnownReads.find(A) != KnownReads.end();
-    }
-  };
-
    DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
             PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
        : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
@@ -1940,9 +1928,8 @@ struct DSEState {
    Optional<MemoryAccess *>
    getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
                    const MemoryLocation &DefLoc, const Value *DefUO,
-                  CheckCache &Cache, unsigned &ScanLimit,
-                  unsigned &WalkerStepLimit, bool IsMemTerm,
-                  unsigned &PartialLimit) {
+                  unsigned &ScanLimit, unsigned &WalkerStepLimit,
+                  bool IsMemTerm, unsigned &PartialLimit) {
      if (ScanLimit == 0 || WalkerStepLimit == 0) {
        LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
        return None;
@@ -1954,6 +1941,7 @@ struct DSEState {
      LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");
  
      // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
+    Optional<MemoryLocation> CurrentLoc;
      do {
        StepAgain = false;
        LLVM_DEBUG({
@@ -2017,12 +2005,8 @@ struct DSEState {
        // clobber, bail out, as the path is not profitable. We skip this check
        // for intrinsic calls, because the code knows how to handle memcpy
        // intrinsics.
-      if (!isa<IntrinsicInst>(CurrentI) &&
-          (Cache.KnownReads.contains(Current) ||
-           isReadClobber(DefLoc, CurrentI))) {
-        Cache.KnownReads.insert(Current);
+      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
          return None;
-      }
  
        // Quick check if there are direct uses that are read-clobbers.
        if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
@@ -2031,7 +2015,6 @@ struct DSEState {
                       isReadClobber(DefLoc, UseOrDef->getMemoryInst());
              return false;
            })) {
-        Cache.KnownReads.insert(Current);
          LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");
          return None;
        }
@@ -2045,13 +2028,24 @@ struct DSEState {
        }
  
        // If Current does not have an analyzable write location, skip it
-      auto CurrentLoc = getLocForWriteEx(CurrentI);
+      CurrentLoc = getLocForWriteEx(CurrentI);
        if (!CurrentLoc) {
          StepAgain = true;
          Current = CurrentDef->getDefiningAccess();
          continue;
        }
  
+      // AliasAnalysis does not account for loops. Limit elimination to
+      // candidates for which we can guarantee they always store to the same
+      // memory location and not multiple locations in a loop.
+      if (Current->getBlock() != KillingDef->getBlock() &&
+          !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
+        StepAgain = true;
+        Current = CurrentDef->getDefiningAccess();
+        WalkerStepLimit -= 1;
+        continue;
+      }
+
        if (IsMemTerm) {
          // If the killing def is a memory terminator (e.g. lifetime.end), check
          // the next candidate if the current Current does not write the same
@@ -2062,17 +2056,6 @@ struct DSEState {
          }
          continue;
        } else {
-        // AliasAnalysis does not account for loops. Limit elimination to
-        // candidates for which we can guarantee they always store to the same
-        // memory location and not multiple locations in a loop.
-        if (Current->getBlock() != KillingDef->getBlock() &&
-            !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
-          StepAgain = true;
-          Current = CurrentDef->getDefiningAccess();
-          WalkerStepLimit -= 1;
-          continue;
-        }
-
          int64_t InstWriteOffset, DepWriteOffset;
          auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
                                DepWriteOffset, InstWriteOffset, BatchAA, &F);
@@ -2133,18 +2116,6 @@ struct DSEState {
        }
        --ScanLimit;
        NumDomMemDefChecks++;
-
-      // Check if we already visited this access.
-      if (Cache.isKnownNoRead(UseAccess)) {
-        LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess
-                          << " is safe earlier.\n");
-        continue;
-      }
-      if (Cache.isKnownRead(UseAccess)) {
-        LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess
-                          << " has a read-clobber earlier.\n");
-        return None;
-      }
        KnownNoReads.insert(UseAccess);
  
        if (isa<MemoryPhi>(UseAccess)) {
@@ -2172,7 +2143,7 @@ struct DSEState {
  
        // A memory terminator kills all preceeding MemoryDefs and all succeeding
        // MemoryAccesses. We do not have to check it's users.
-      if (isMemTerminator(DefLoc, KillingI, UseInst)) {
+      if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
          LLVM_DEBUG(
              dbgs()
              << " ... skipping, memterminator invalidates following accesses\n");
@@ -2187,19 +2158,13 @@ struct DSEState {
  
        if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
          LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");
-        Cache.KnownReads.insert(UseAccess);
-        Cache.KnownReads.insert(StartAccess);
-        Cache.KnownReads.insert(EarlierAccess);
          return None;
        }
  
        // Uses which may read the original MemoryDef mean we cannot eliminate the
        // original MD. Stop walk.
-      if (isReadClobber(DefLoc, UseInst)) {
+      if (isReadClobber(*CurrentLoc, UseInst)) {
          LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
-        Cache.KnownReads.insert(UseAccess);
-        Cache.KnownReads.insert(StartAccess);
-        Cache.KnownReads.insert(EarlierAccess);
          return None;
        }
  
@@ -2223,7 +2188,7 @@ struct DSEState {
        //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
        //                  stores [0,1]
        if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
-        if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) {
+        if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
            if (!isInvisibleToCallerAfterRet(DefUO) &&
                UseAccess != EarlierAccess) {
              BasicBlock *MaybeKillingBlock = UseInst->getParent();
@@ -2311,7 +2276,6 @@ struct DSEState {
  
      // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
      // potentially dead.
-    Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end());
      return {EarlierAccess};
    }
  
@@ -2550,7 +2514,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
  
      bool Shortend = false;
      bool IsMemTerm = State.isMemTerminatorInst(SI);
-    DSEState::CheckCache Cache;
      // Check if MemoryAccesses in the worklist are killed by KillingDef.
      for (unsigned I = 0; I < ToCheck.size(); I++) {
        Current = ToCheck[I];
@@ -2558,8 +2521,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
          continue;
  
        Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit,
-          WalkerStepLimit, IsMemTerm, PartialLimit);
+          KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
+          IsMemTerm, PartialLimit);
  
        if (!Next) {
          LLVM_DEBUG(dbgs() << "  finished walk\n");
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll

index b22f5b6..fe50876 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
@@ -41,13 +41,13 @@ bb3:
  }
  
  ; Post-dominating store.
+; TODO: The memset can be shortened.
  define void @accessible_after_return_2(i32* noalias %P, i1 %c) {
  ; CHECK-LABEL: @accessible_after_return_2(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
  ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll

index 3a16a62..925ab4e 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
@@ -36,6 +36,8 @@ define i32 @test_out_of_bounds_store_nonlocal(i1 %c) {
  ; CHECK-NEXT:    [[D:%.*]] = alloca [1 x i32], align 4
  ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
  ; CHECK:       for.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0
+; CHECK-NEXT:    store i32 10, i32* [[ARRAYIDX]], align 4
  ; CHECK-NEXT:    br label [[FOR_INC:%.*]]
  ; CHECK:       for.inc:
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll

index 05326de..b4ed6cc 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
@@ -67,13 +67,11 @@ end:
    ret void
  }
  
-; TODO: The store to %a0 is dead, because only %a1 is read later.
+; The store to %a0 is dead, because only %a1 is read later.
  define void @test3(i1 %c) {
  ; CHECK-LABEL: @test3(
  ; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i8], align 1
-; CHECK-NEXT:    [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
  ; CHECK-NEXT:    [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
-; CHECK-NEXT:    store i8 1, i8* [[A0]], align 1
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
@@ -102,10 +100,8 @@ else:
  define void @test4(i1 %c) {
  ; CHECK-LABEL: @test4(
  ; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i8], align 1
-; CHECK-NEXT:    [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
  ; CHECK-NEXT:    [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
  ; CHECK-NEXT:    store i8 1, i8* [[A1]], align 1
-; CHECK-NEXT:    store i8 1, i8* [[A0]], align 1
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll

index b2e6262..8987073 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll
@@ -4,13 +4,13 @@
  ; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias
  ; metadata is correct, but the first store cannot be eliminated, as it may be
  ; read-clobbered by the load.
-; TODO The store is incorrectly eliminated.
  define void @test(i1 %c, i8* %p1, i8* %p2) {
  ; CHECK-LABEL: @test(
+; CHECK-NEXT:    store i8 0, i8* [[P1:%.*]], align 1
  ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0
  ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
  ; CHECK:       if:
-; CHECK-NEXT:    store i8 1, i8* [[P1:%.*]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 1, i8* [[P1]], align 1, !noalias !0
  ; CHECK-NEXT:    ret void
  ; CHECK:       else:
  ; CHECK-NEXT:    store i8 2, i8* [[P1]], align 1
author	Nikita Popov <nikita.ppv@gmail.com>
	Mon, 30 Nov 2020 22:51:54 +0000 (23:51 +0100)
committer	Nikita Popov <nikita.ppv@gmail.com>
	Fri, 18 Dec 2020 19:26:53 +0000 (20:26 +0100)
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll		patch \| blob \| history