auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
- hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
+ State.getLocForWriteEx(&I) && isRemovable(&I))
State.MemDefs.push_back(MD);
// Track whether alloca and alloca-like objects are visible in the
UseInst, IOL, AA, &F) == OW_Complete;
}
- /// Returns true if \p Use may read from \p DefLoc.
+ /// Returns true if \p Def is not read before returning from the function.
+ bool isWriteAtEndOfFunction(MemoryDef *Def) {
+ LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
+ << *Def->getMemoryInst()
+ << ") is at the end the function \n");
+
+ auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
+ if (!MaybeLoc) {
+ LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
+ return false;
+ }
+
+ SmallVector<MemoryAccess *, 4> WorkList;
+ SmallPtrSet<MemoryAccess *, 8> Visited;
+ auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
+ if (!Visited.insert(Acc).second)
+ return;
+ for (Use &U : Acc->uses())
+ WorkList.push_back(cast<MemoryAccess>(U.getUser()));
+ };
+ PushMemUses(Def);
+ for (unsigned I = 0; I < WorkList.size(); I++) {
+ if (WorkList.size() >= MemorySSAScanLimit) {
+ LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
+ return false;
+ }
+
+ MemoryAccess *UseAccess = WorkList[I];
+ if (isa<MemoryPhi>(UseAccess)) {
+ PushMemUses(UseAccess);
+ continue;
+ }
+
+ // TODO: Checking for aliasing is expensive. Consider reducing the amount
+ // of times this is called and/or caching it.
+ Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
+ if (isReadClobber(*MaybeLoc, UseInst)) {
+ LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
+ return false;
+ }
+
+ if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
+ PushMemUses(UseDef);
+ }
+ return true;
+ }
+
+ // Returns true if \p Use may read from \p DefLoc.
bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
if (!UseInst->mayReadFromMemory())
return false;
return false;
}
+ /// Eliminate writes to objects that are not visible in the caller and are not
+ /// accessed before returning from the function.
+ bool eliminateDeadWritesAtEndOfFunction() {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ bool MadeChange = false;
+ LLVM_DEBUG(
+ dbgs()
+ << "Trying to eliminate MemoryDefs at the end of the function\n");
+ for (int I = MemDefs.size() - 1; I >= 0; I--) {
+ MemoryDef *Def = MemDefs[I];
+ if (SkipStores.find(Def) != SkipStores.end())
+ continue;
+
+ // TODO: Consider doing the underlying object check first, if it is
+ // beneficial compile-time wise.
+ if (isWriteAtEndOfFunction(Def)) {
+ Instruction *DefI = Def->getMemoryInst();
+ // See through pointer-to-pointer bitcasts
+ SmallVector<const Value *, 4> Pointers;
+ GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL);
+
+ LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
+ "of the function\n");
+ bool CanKill = true;
+ for (const Value *Pointer : Pointers) {
+ if (!InvisibleToCallerAfterRet.count(Pointer)) {
+ CanKill = false;
+ break;
+ }
+ }
+
+ if (CanKill) {
+ deleteDeadInstruction(DefI);
+ ++NumFastStores;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+ }
+
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) {
for (auto &KV : State.IOLs)
MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second);
+ MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
} // end anonymous namespace
-; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-; XFAIL: *
; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s
declare i8* @strcpy(i8* %dest, i8* %src) nounwind
!20 = !DILocation(line: 9, column: 5, scope: !14)
!21 = !DILocation(line: 10, column: 1, scope: !14)
-; Check that the store is removed and that the memcpy is still there
+; Check that the both the store and memcpy are removed because they both access
+; an alloca that is not read.
; CHECK-LABEL: foo
; CHECK-NOT: store i8
-; CHECK: call void @llvm.memcpy
+; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s
declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
call void @capture(i8* %m)
ret i8* %m
}
-; TODO: Remove store in exit.
; Stores to stack objects can be eliminated if they are not captured inside the function.
define void @test_alloca_nocapture_1() {
; CHECK-LABEL: @test_alloca_nocapture_1(
-; CHECK-NEXT: [[M:%.*]] = alloca i8
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
ret void
}
-; TODO: Remove store in exit.
; Cannot remove first store i8 0, i8* %m, as the call to @capture captures the object.
define void @test_alloca_capture_1() {
; CHECK-LABEL: @test_alloca_capture_1(
; CHECK-NEXT: call void @capture(i8* [[M]])
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
ret void
}
-; TODO: Remove store at exit.
; We can remove the last store to %m, even though it escapes because the alloca
; becomes invalid after the function returns.
define void @test_alloca_capture_2(%S1* %E) {
; CHECK: exit:
; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0
; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]]
-; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none []
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: store i32 40, i32* [[SV]]
; CHECK-NEXT: ret void
;
block1:
; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i64 10)
-; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ]
define void @alloca_1(i1 %c) {
; CHECK-LABEL: @alloca_1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
-; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
-; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
define void @alloca_2(i1 %c) {
; CHECK-LABEL: @alloca_2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
-; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
-; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
define void @alloca_3(i1 %c) {
; CHECK-LABEL: @alloca_3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
-; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
-; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-LABEL: @alloca_5(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BLAM_4:%.*]], align 8
-; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 1
-; CHECK-NEXT: [[TMP37:%.*]] = bitcast i64** [[TMP36]] to i8*
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 3
; CHECK-NEXT: [[TMP39:%.*]] = bitcast i64* [[TMP38]] to i64*
; CHECK-NEXT: store i64 0, i64* [[TMP39]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB46:%.*]], label [[BB47:%.*]]
; CHECK: bb46:
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP37]], i8 0, i64 26, i1 false)
; CHECK-NEXT: ret void
; CHECK: bb47:
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 2
define void @test11() {
; CHECK-LABEL: @test11(
-; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: store i32 0, i32* [[P]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: ret void
-; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
declare noalias i8* @malloc(i64) "malloc-like"
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
declare void @llvm.init.trampoline(i8*, i8*, i8*)
-; Test for byval handling.
-%struct.x = type { i32, i32, i32, i32 }
-define void @test9(%struct.x* byval %a) nounwind {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT: ret void
-;
- %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
- store i32 1, i32* %tmp2, align 4
- ret void
-}
-
-; Test for inalloca handling.
-define void @test9_2(%struct.x* inalloca %a) nounwind {
-; CHECK-LABEL: @test9_2(
-; CHECK-NEXT: ret void
-;
- %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
- store i32 1, i32* %tmp2, align 4
- ret void
-}
-
-; Test for preallocated handling.
-define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
-; CHECK-LABEL: @test9_3(
-; CHECK-NEXT: ret void
-;
- %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
- store i32 1, i32* %tmp2, align 4
- ret void
-}
-
; DSE should delete the dead trampoline.
declare void @test11f()
define void @test11() {
declare noalias i8* @malloc(i32)
-define void @test14(i32* %Q) {
-; CHECK-LABEL: @test14(
-; CHECK-NEXT: ret void
-;
- %P = alloca i32
- %DEAD = load i32, i32* %Q
- store i32 %DEAD, i32* %P
- ret void
-
-}
-
-define void @test20() {
-; CHECK-LABEL: @test20(
-; CHECK-NEXT: ret void
-;
- %m = call i8* @malloc(i32 24)
- store i8 0, i8* %m
- ret void
-}
-
-define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
-; CHECK-LABEL: @test22(
-; CHECK-NEXT: ret void
-;
- %k.addr = alloca i32
- %m.addr = alloca i32
- %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
- store i32 0, i32* %k.addr.m.addr, align 4
- ret void
-}
-
declare void @unknown_func()
; Remove redundant store if loaded value is in another block inside a loop.
}
+; Test for byval handling.
+%struct.x = type { i32, i32, i32, i32 }
+define void @test9(%struct.x* byval %a) nounwind {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: ret void
+;
+ %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+ store i32 1, i32* %tmp2, align 4
+ ret void
+}
+
+; Test for inalloca handling.
+define void @test9_2(%struct.x* inalloca %a) nounwind {
+; CHECK-LABEL: @test9_2(
+; CHECK-NEXT: ret void
+;
+ %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+ store i32 1, i32* %tmp2, align 4
+ ret void
+}
+
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT: ret void
+;
+ %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+ store i32 1, i32* %tmp2, align 4
+ ret void
+}
+
; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(
declare noalias i8* @malloc(i32)
declare noalias i8* @calloc(i32, i32)
+define void @test14(i32* %Q) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: ret void
+;
+ %P = alloca i32
+ %DEAD = load i32, i32* %Q
+ store i32 %DEAD, i32* %P
+ ret void
+
+}
+
; The store here is not dead because the byval call reads it.
declare void @test19f({i32}* byval align 4 %P)
}
+define void @test20() {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: ret void
+;
+ %m = call i8* @malloc(i32 24)
+ store i8 0, i8* %m
+ ret void
+}
+
define void @test21() {
; CHECK-LABEL: @test21(
; CHECK-NEXT: ret void
ret void
}
+define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: ret void
+;
+ %k.addr = alloca i32
+ %m.addr = alloca i32
+ %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
+ store i32 0, i32* %k.addr.m.addr, align 4
+ ret void
+}
+
; PR13547
declare noalias i8* @strdup(i8* nocapture) nounwind
define noalias i8* @test23() nounwind uwtable ssp {