[LoopUnrollRuntime] Allow indirect transition to deopt non-latch exit blocks
authorYevgeny Rouban <yrouban@azul.com>
Thu, 8 Jun 2023 09:32:12 +0000 (16:32 +0700)
committerYevgeny Rouban <yrouban@azul.com>
Mon, 19 Jun 2023 04:10:01 +0000 (11:10 +0700)
Relax condition on runtime trip count unrolling loops with 1 non-latch exit
that leads to a deop block.

There are cases when the deopt blocks are common exits for different loops.
LoopSimplify pass splits such edges to the common deopting blocks to make
sure that all exit nodes of the loop only have predecessors that are inside
of the loop (See simplifyOneLoop()). This breaks the current condition for
unrolling. This patch allows the split transitive blocks that still lead to
the deopting blocks.

Differential Revision: https://reviews.llvm.org/D152639

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll

index b19156b..1e22eca 100644 (file)
@@ -457,7 +457,7 @@ static bool canProfitablyUnrollMultiExitLoop(
   // call.
   return (OtherExits.size() == 1 &&
           (UnrollRuntimeOtherExitPredictable ||
-           OtherExits[0]->getTerminatingDeoptimizeCall()));
+           OtherExits[0]->getPostdominatingDeoptimizeCall()));
   // TODO: These can be fine-tuned further to consider code size or deopt states
   // that are captured by the deoptimize exit block.
   // Also, we can extend this to support more cases, if we actually
index 32d1454..fbd4148 100644 (file)
@@ -760,6 +760,348 @@ otherexit:
   ret i32 57
 }
 
+; the exit block is not a deopt block but it leads to deopt one.
+define i32 @test5(ptr nocapture %a, i64 %n) {
+;
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK:       entry.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], -8
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK:       for.exiting_block:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK:       for.exiting_block.1:
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK:       latch.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK:       for.exiting_block.2:
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK:       latch.2:
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK:       for.exiting_block.3:
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK:       latch.3:
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK:       for.exiting_block.4:
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK:       latch.4:
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_5:%.*]]
+; CHECK:       for.exiting_block.5:
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK:       latch.5:
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_6:%.*]]
+; CHECK:       for.exiting_block.6:
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK:       latch.6:
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_7:%.*]]
+; CHECK:       for.exiting_block.7:
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK:       latch.7:
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; CHECK:       latchexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK:       latchexit.unr-lcssa:
+; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
+; CHECK:       header.epil:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK:       for.exiting_block.epil:
+; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; CHECK:       latch.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       latchexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[LATCHEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       otherexit.loopexit:
+; CHECK-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
+; CHECK:       otherexit.loopexit3:
+; CHECK-NEXT:    br label [[OTHEREXIT]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT2:%.*]]
+; CHECK:       otherexit2:
+; CHECK-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; CHECK-NEXT:    ret i32 [[RVAL2]]
+;
+; NOUNROLL-LABEL: @test5(
+; NOUNROLL-NEXT:  entry:
+; NOUNROLL-NEXT:    br label [[HEADER:%.*]]
+; NOUNROLL:       header:
+; NOUNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL:       for.exiting_block:
+; NOUNROLL-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT:    br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; NOUNROLL:       latch:
+; NOUNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT:    br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL:       latchexit:
+; NOUNROLL-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL:       otherexit:
+; NOUNROLL-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    [[RVAL:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    br label [[OTHEREXIT2:%.*]]
+; NOUNROLL:       otherexit2:
+; NOUNROLL-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; NOUNROLL-NEXT:    ret i32 [[RVAL2]]
+;
+; ENABLED-LABEL: @test5(
+; ENABLED-NEXT:  entry:
+; ENABLED-NEXT:    [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED:       entry.new:
+; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; ENABLED-NEXT:    br label [[HEADER:%.*]]
+; ENABLED:       header:
+; ENABLED-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; ENABLED-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; ENABLED:       for.exiting_block:
+; ENABLED-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; ENABLED:       latch:
+; ENABLED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; ENABLED-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; ENABLED-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ENABLED-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
+; ENABLED:       for.exiting_block.1:
+; ENABLED-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; ENABLED:       latch.1:
+; ENABLED-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; ENABLED-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_2:%.*]]
+; ENABLED:       for.exiting_block.2:
+; ENABLED-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; ENABLED:       latch.2:
+; ENABLED-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; ENABLED-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; ENABLED-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_3:%.*]]
+; ENABLED:       for.exiting_block.3:
+; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; ENABLED:       latch.3:
+; ENABLED-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; ENABLED-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_4:%.*]]
+; ENABLED:       for.exiting_block.4:
+; ENABLED-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; ENABLED:       latch.4:
+; ENABLED-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; ENABLED-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; ENABLED-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_5:%.*]]
+; ENABLED:       for.exiting_block.5:
+; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; ENABLED:       latch.5:
+; ENABLED-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; ENABLED-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_6:%.*]]
+; ENABLED:       for.exiting_block.6:
+; ENABLED-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; ENABLED:       latch.6:
+; ENABLED-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; ENABLED-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; ENABLED-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_7:%.*]]
+; ENABLED:       for.exiting_block.7:
+; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; ENABLED:       latch.7:
+; ENABLED-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; ENABLED-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; ENABLED-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; ENABLED:       latchexit.unr-lcssa.loopexit:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; ENABLED:       latchexit.unr-lcssa:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]]
+; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
+; ENABLED:       header.epil:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; ENABLED:       for.exiting_block.epil:
+; ENABLED-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; ENABLED:       latch.epil:
+; ENABLED-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; ENABLED-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; ENABLED-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; ENABLED-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; ENABLED-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; ENABLED-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; ENABLED-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; ENABLED:       latchexit.epilog-lcssa:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH2:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
+; ENABLED-NEXT:    br label [[LATCHEXIT]]
+; ENABLED:       latchexit:
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; ENABLED:       otherexit.loopexit:
+; ENABLED-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; ENABLED-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT:%.*]]
+; ENABLED:       otherexit.loopexit3:
+; ENABLED-NEXT:    [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
+; ENABLED-NEXT:    [[RVAL_PH5:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT]]
+; ENABLED:       otherexit:
+; ENABLED-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_LCSSA_PH4]], [[OTHEREXIT_LOOPEXIT3]] ]
+; ENABLED-NEXT:    [[RVAL:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[RVAL_PH5]], [[OTHEREXIT_LOOPEXIT3]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT2:%.*]]
+; ENABLED:       otherexit2:
+; ENABLED-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; ENABLED-NEXT:    ret i32 [[RVAL2]]
+;
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br label %for.exiting_block
 
+for.exiting_block:
+  %cmp = icmp eq i64 %n, 42
+  br i1 %cmp, label %otherexit, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchexit, label %header
+
+latchexit:                                          ; preds = %latch
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+otherexit:
+  %rval = phi i32 [%sum.02, %for.exiting_block ]
+  br label %otherexit2
+
+otherexit2:
+  %rval2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02) ]
+  ret i32 %rval2
+}
 
 declare i32 @llvm.experimental.deoptimize.i32(...)