const LoopInfo *LI;
};
+
+ /// Find the 'earliest' instruction before which the pointer is known not to
+ /// be captured. Here an instruction A is considered earlier than instruction
+ /// B, if A dominates B. If 2 escapes do not dominate each other, the
+ /// terminator of the common dominator is chosen. If not all uses cannot be
+ /// analyzed, the earliest escape is set to the first instruction in the
+ /// function entry block.
+ // NOTE: Users have to make sure instructions compared against the earliest
+ // escape are not in a cycle.
+ struct EarliestCaptures : public CaptureTracker {
+
+ EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
+ : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {}
+
+ void tooManyUses() override {
+ Captured = true;
+ EarliestCapture = &*F.getEntryBlock().begin();
+ }
+
+ bool captured(const Use *U) override {
+ Instruction *I = cast<Instruction>(U->getUser());
+ if (isa<ReturnInst>(I) && !ReturnCaptures)
+ return false;
+
+ if (!EarliestCapture) {
+ EarliestCapture = I;
+ } else if (EarliestCapture->getParent() == I->getParent()) {
+ if (I->comesBefore(EarliestCapture))
+ EarliestCapture = I;
+ } else {
+ BasicBlock *CurrentBB = I->getParent();
+ BasicBlock *EarliestBB = EarliestCapture->getParent();
+ if (DT.dominates(EarliestBB, CurrentBB)) {
+ // EarliestCapture already comes before the current use.
+ } else if (DT.dominates(CurrentBB, EarliestBB)) {
+ EarliestCapture = I;
+ } else {
+ // Otherwise find the nearest common dominator and use its terminator.
+ auto *NearestCommonDom =
+ DT.findNearestCommonDominator(CurrentBB, EarliestBB);
+ EarliestCapture = NearestCommonDom->getTerminator();
+ }
+ }
+ Captured = true;
+
+ // Return false to continue analysis; we need to see all potential
+ // captures.
+ return false;
+ }
+
+ Instruction *EarliestCapture = nullptr;
+
+ const DominatorTree &DT;
+
+ bool ReturnCaptures;
+
+ bool Captured;
+
+ Function &F;
+ };
}
/// PointerMayBeCaptured - Return true if this pointer value may be captured
return CB.Captured;
}
+Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ EarliestCaptures CB(ReturnCaptures, F, DT);
+ PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
+ if (CB.Captured)
+ ++NumCapturedBefore;
+ else
+ ++NumNotCapturedBefore;
+ return CB.EarliestCapture;
+}
+
void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
/// basic block.
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ DenseMap<const Value *, Instruction *> EarliestEscapes;
+ DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj;
+
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
DepWriteOffset) == OW_Complete;
}
+ /// Returns true if \p Object is not captured before or by \p I.
+ bool notCapturedBeforeOrAt(const Value *Object, Instruction *I) {
+ if (!isIdentifiedFunctionLocal(Object))
+ return false;
+
+ auto Iter = EarliestEscapes.insert({Object, nullptr});
+ if (Iter.second) {
+ Instruction *EarliestCapture = FindEarliestCapture(
+ Object, F, /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
+ if (EarliestCapture) {
+ auto Ins = Inst2Obj.insert({EarliestCapture, {}});
+ Ins.first->second.push_back(Object);
+ }
+ Iter.first->second = EarliestCapture;
+ }
+
+ // No capturing instruction.
+ if (!Iter.first->second)
+ return true;
+
+ return I != Iter.first->second &&
+ !isPotentiallyReachable(Iter.first->second, I, nullptr, &DT, &LI);
+ }
+
// Returns true if \p Use may read from \p DefLoc.
bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
if (isNoopIntrinsic(UseInst))
if (CB->onlyAccessesInaccessibleMemory())
return false;
+ // BasicAA does not spend linear time to check whether local objects escape
+ // before potentially aliasing accesses. To improve DSE results, compute and
+ // cache escape info for local objects in certain circumstances.
+ if (auto *LI = dyn_cast<LoadInst>(UseInst)) {
+ // If the loads reads from a loaded underlying object accesses the load
+ // cannot alias DefLoc, if DefUO is a local object that has not escaped
+ // before the load.
+ auto *ReadUO = getUnderlyingObject(LI->getPointerOperand());
+ auto *DefUO = getUnderlyingObject(DefLoc.Ptr);
+ if (DefUO && ReadUO && isa<LoadInst>(ReadUO) &&
+ notCapturedBeforeOrAt(DefUO, UseInst)) {
+ assert(
+ !PointerMayBeCapturedBefore(DefLoc.Ptr, false, true, UseInst, &DT,
+ false, 0, &this->LI) &&
+ "cached analysis disagrees with fresh PointerMayBeCapturedBefore");
+ return false;
+ }
+ }
+
// NOTE: For calls, the number of stores removed could be slightly improved
// by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
// be expensive compared to the benefits in practice. For now, avoid more
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
+
Updater.removeMemoryAccess(MA);
}
NowDeadInsts.push_back(OpI);
}
+ // Clear any cached escape info for objects associated with the
+ // removed instructions.
+ auto Iter = Inst2Obj.find(DeadInst);
+ if (Iter != Inst2Obj.end()) {
+ for (const Value *Obj : Iter->second)
+ EarliestEscapes.erase(Obj);
+ Inst2Obj.erase(DeadInst);
+ }
DeadInst->eraseFromParent();
}
}
define i32 @test_not_captured_before_load_same_bb(i32** %in.ptr) {
; CHECK-LABEL: @test_not_captured_before_load_same_bb(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_not_captured_before_load_same_bb_escape_unreachable_block(i32** %in.ptr) {
; CHECK-LABEL: @test_not_captured_before_load_same_bb_escape_unreachable_block(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_captured_after_load_same_bb_2_clobbered_later(i32** %in.ptr) {
; CHECK-LABEL: @test_captured_after_load_same_bb_2_clobbered_later(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: call void @escape_writeonly(i32* [[A]])
; CHECK-LABEL: @test_captured_before_load_same_bb_2(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 55, i32* [[A]], align 4
+; CHECK-NEXT: call void @escape_writeonly(i32* [[A]])
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
-; CHECK-NEXT: call void @escape_and_clobber(i32* [[A]])
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
; CHECK-NEXT: call void @clobber()
;
%a = alloca i32, align 4
store i32 55, i32* %a
+ call void @escape_writeonly(i32* %a)
%in.lv.1 = load i32* , i32** %in.ptr, align 2
- call void @escape_and_clobber(i32* %a)
%in.lv.2 = load i32 , i32* %in.lv.1, align 2
store i32 99, i32* %a, align 4
call void @clobber()
define i32 @test_captured_sibling_path_to_load_other_blocks_1(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_captured_sibling_path_to_load_other_blocks_1(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: call void @escape_writeonly(i32* [[A]])
define i32 @test_only_captured_sibling_path_with_ret_to_load_other_blocks(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_only_captured_sibling_path_with_ret_to_load_other_blocks(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: call void @escape_writeonly(i32* [[A]])
define i32 @test_not_captured_before_load_other_blocks_1(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_1(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_not_captured_before_load_other_blocks_2(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_2(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_not_captured_before_load_other_blocks_3(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_3(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_not_captured_before_load_other_blocks_4(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_4(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-LABEL: @test_not_captured_before_load_other_blocks_7(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: call void @escape_writeonly(i32* [[A]])
define i32 @test_captured_after_loop(i32** %in.ptr, i1 %c.1) {
; CHECK-LABEL: @test_captured_after_loop(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
define void @test_memset_not_captured_before_load() {
; CHECK-LABEL: @test_memset_not_captured_before_load(
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 4
-; CHECK-NEXT: [[CAST_A:%.*]] = bitcast [2 x i32]* [[A]] to i8*
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[CAST_A]], i32 4
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[TMP1]], i8 0, i32 4, i1 false)
; CHECK-NEXT: [[LV_1:%.*]] = load [10 x i16]*, [10 x i16]** @global, align 8
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i32 0, i32 0
; CHECK-NEXT: store i32 1, i32* [[GEP_A_0]], align 4
; CHECK-NEXT: bb:
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 4
; CHECK-NEXT: [[CAST_A:%.*]] = bitcast [2 x i32]* [[A]] to i8*
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[CAST_A]], i8 0, i32 8, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CAST_A]], i32 4
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[TMP0]], i8 0, i32 4, i1 false)
; CHECK-NEXT: [[LV_1:%.*]] = load [10 x i16]*, [10 x i16]** @global, align 8
; CHECK-NEXT: [[GEP_LV:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* [[LV_1]], i64 0, i32 1
; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[GEP_LV]], align 2
define i32 @test_not_captured_before_load_same_bb_noalias_call(i32** %in.ptr) {
; CHECK-LABEL: @test_not_captured_before_load_same_bb_noalias_call(
; CHECK-NEXT: [[A:%.*]] = call i32* @alloc()
-; CHECK-NEXT: store i32 55, i32* [[A]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
; CHECK-NEXT: store i32 99, i32* [[A]], align 4
define i32 @test_not_captured_before_load_same_bb_noalias_arg(i32** %in.ptr, i32* noalias %a) {
; CHECK-LABEL: @test_not_captured_before_load_same_bb_noalias_arg(
-; CHECK-NEXT: store i32 55, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2
; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2
-; CHECK-NEXT: store i32 99, i32* [[A]], align 4
+; CHECK-NEXT: store i32 99, i32* [[A:%.*]], align 4
; CHECK-NEXT: call void @escape_and_clobber(i32* [[A]])
; CHECK-NEXT: ret i32 [[IN_LV_2]]
;
call void @escape_and_clobber(i32* %a)
ret i32 %in.lv.2
}
+
+define i32 @instruction_captures_multiple_objects(i32* %p.1, i32** %p.2, i32** %p.3, i1 %c) {
+; CHECK-LABEL: @instruction_captures_multiple_objects(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A_2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 0, i32* [[P_1:%.*]], align 8
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[LV_2:%.*]] = load i32*, i32** [[P_2:%.*]], align 8
+; CHECK-NEXT: [[LV_2_2:%.*]] = load i32, i32* [[LV_2]], align 4
+; CHECK-NEXT: ret i32 [[LV_2_2]]
+; CHECK: else:
+; CHECK-NEXT: [[LV_3:%.*]] = load i32*, i32** [[P_3:%.*]], align 8
+; CHECK-NEXT: [[LV_3_2:%.*]] = load i32, i32* [[LV_3]], align 4
+; CHECK-NEXT: call void @capture_and_clobber_multiple(i32* [[A_1]], i32* [[A_2]])
+; CHECK-NEXT: ret i32 [[LV_3_2]]
+;
+entry:
+ %a.1 = alloca i32
+ %a.2 = alloca i32
+ store i32 0, i32* %p.1, align 8
+ br i1 %c, label %then, label %else
+
+then:
+ store i32 99, i32* %a.2, align 4
+ %lv.2 = load i32*, i32** %p.2
+ %lv.2.2 = load i32, i32* %lv.2
+ store i32 0, i32* %a.1, align 8
+ ret i32 %lv.2.2
+
+else:
+ %lv.3 = load i32*, i32** %p.3
+ %lv.3.2 = load i32, i32* %lv.3
+ call void @capture_and_clobber_multiple(i32* %a.1, i32* %a.2)
+ ret i32 %lv.3.2
+}
+
+declare void @capture_and_clobber_multiple(i32*, i32*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+define i64 @earliest_escape_ptrtoint(i64** %p.1) {
+; CHECK-LABEL: @earliest_escape_ptrtoint(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_1:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[A_2:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[LV_1:%.*]] = load i64*, i64** [[P_1:%.*]], align 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i64, i64* [[LV_1]], align 4
+; CHECK-NEXT: store i64* [[A_1]], i64** [[P_1]], align 8
+; CHECK-NEXT: [[A_2_CAST:%.*]] = bitcast i64* [[A_2]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[A_2_CAST]])
+; CHECK-NEXT: ret i64 [[LV_2]]
+;
+entry:
+ %a.1 = alloca i64
+ %a.2 = alloca i64
+ store i64 99, i64* %a.1
+ %lv.1 = load i64*, i64** %p.1
+ %lv.2 = load i64, i64* %lv.1
+ store i64* %a.1, i64** %p.1, align 8
+ %int = ptrtoint i64* %a.2 to i64
+ store i64 %int , i64* %a.2, align 8
+ %a.2.cast = bitcast i64* %a.2 to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %a.2.cast)
+ ret i64 %lv.2
+}