"enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop,
- bool LoopNestMode);
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI,
+ bool &FoldableInLoop, bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- bool FreeInLoop = false;
+ bool FoldableInLoop = false;
bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
- SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+ isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FoldableInLoop,
+ LoopNestMode) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
- if (!FreeInLoop) {
+ if (!FoldableInLoop) {
++II;
salvageDebugInfo(I);
eraseInstruction(I, *SafetyInfo, MSSAU);
return true;
}
-/// Return true if the instruction is free in the loop.
-static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
+/// Return true if the instruction is foldable in the loop.
+static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
const TargetTransformInfo *TTI) {
- InstructionCost CostI =
- TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
-
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ InstructionCost CostI =
+ TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
if (CostI != TargetTransformInfo::TCC_Free)
return false;
// For a GEP, we cannot simply use getInstructionCost because currently
return true;
}
- return CostI == TargetTransformInfo::TCC_Free;
+ return false;
}
/// Return true if the only users of this instruction are outside of
///
/// We also return true if the instruction could be folded away in lowering.
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop,
- bool LoopNestMode) {
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI,
+ bool &FoldableInLoop, bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
- bool IsFree = isFreeInLoop(I, CurLoop, TTI);
+ bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
}
if (CurLoop->contains(UI)) {
- if (IsFree) {
- FreeInLoop = true;
+ if (IsFoldable) {
+ FoldableInLoop = true;
continue;
}
return false;
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: li 6, 0
-; CHECK-NEXT: lwz 7, 0(3)
+; CHECK-NEXT: lwz 5, 0(3)
; CHECK-NEXT: b .LBB2_2
; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: beq 0, .LBB2_7
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB2_5 Depth 2
-; CHECK-NEXT: addi 5, 7, 1
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: addi 5, 5, 1
; CHECK-NEXT: cmplw 7, 4
; CHECK-NEXT: bc 12, 0, .LBB2_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: ld 7, 0(3)
+; CHECK-NEXT: ld 5, 0(3)
; CHECK-NEXT: li 6, 0
; CHECK-NEXT: b .LBB3_2
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 7
-; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: beq 0, .LBB3_7
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_5 Depth 2
-; CHECK-NEXT: addi 5, 7, 1
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: addi 5, 5, 1
; CHECK-NEXT: cmpld 7, 4
; CHECK-NEXT: bc 12, 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-LABEL: atomicrmw_udec_wrap_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: lwz 6, 0(3)
+; CHECK-NEXT: lwz 5, 0(3)
; CHECK-NEXT: b .LBB6_2
; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB6_7
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB6_5 Depth 2
+; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: cmpwi 6, 0
; CHECK-NEXT: cmplw 1, 6, 4
-; CHECK-NEXT: addi 5, 6, -1
+; CHECK-NEXT: addi 5, 5, -1
; CHECK-NEXT: cror 20, 2, 5
; CHECK-NEXT: bc 12, 20, .LBB6_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: ld 6, 0(3)
+; CHECK-NEXT: ld 5, 0(3)
; CHECK-NEXT: b .LBB7_2
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB7_7
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB7_5 Depth 2
-; CHECK-NEXT: cmpdi 6, 0
+; CHECK-NEXT: mr. 6, 5
; CHECK-NEXT: cmpld 1, 6, 4
-; CHECK-NEXT: addi 5, 6, -1
+; CHECK-NEXT: addi 5, 5, -1
; CHECK-NEXT: cror 20, 2, 5
; CHECK-NEXT: bc 12, 20, .LBB7_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; NO_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
; NO_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
; NO_ASSUME: bb13:
-; NO_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
-; NO_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
-; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
+; NO_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
+; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
; NO_ASSUME-NEXT: call void @__msan_warning_noreturn()
; NO_ASSUME-NEXT: unreachable
; NO_ASSUME: bb15:
; USE_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
; USE_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
; USE_ASSUME: bb13:
-; USE_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
-; USE_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
-; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
+; USE_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
+; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
; USE_ASSUME-NEXT: call void @__msan_warning_noreturn()
; USE_ASSUME-NEXT: unreachable
; USE_ASSUME: bb15:
; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[ARRAYIDX0]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: loopexit1:
-; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[IF_END]] ]
+; CHECK-NEXT: [[TRUNC_LCSSA1:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
; CHECK-NEXT: [[P_ADDR_LCSSA:%.*]] = phi ptr [ [[P_ADDR]], [[IF_END]] ]
-; CHECK-NEXT: [[TRUNC_LE:%.*]] = trunc i64 [[ADD_LCSSA]] to i32
-; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LE]]
-; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LE]])
+; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
+; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LCSSA1]]
+; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LCSSA]])
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ]
declare void @use.i32(i32)
declare void @use.i64(i64)
+; Don't duplicate freeze just because it's free.
define i32 @duplicate_freeze(i1 %c, i32 %x) {
; CHECK-LABEL: @duplicate_freeze(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @use.i32(i32 [[FR]])
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[FR_LE:%.*]] = freeze i32 [[X]]
-; CHECK-NEXT: ret i32 [[FR_LE]]
+; CHECK-NEXT: [[FR_LCSSA:%.*]] = phi i32 [ [[FR]], [[LOOP]] ]
+; CHECK-NEXT: ret i32 [[FR_LCSSA]]
;
entry:
br label %loop
ret i32 %fr
}
+; Don't duplicate ptrtoint just because it's free.
define i64 @duplicate_ptrtoint(i1 %c, ptr %p) {
; CHECK-LABEL: @duplicate_ptrtoint(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @use.i64(i64 [[PI]])
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[PI_LE:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT: ret i64 [[PI_LE]]
+; CHECK-NEXT: [[PI_LCSSA:%.*]] = phi i64 [ [[PI]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[PI_LCSSA]]
;
entry:
br label %loop