[LoopUnrollRuntime] Allow indirect transition to deopt non-latch exit blocks
authorYevgeny Rouban <yrouban@azul.com>
Thu, 8 Jun 2023 09:32:12 +0000 (16:32 +0700)
committerYevgeny Rouban <yrouban@azul.com>
Mon, 19 Jun 2023 04:10:01 +0000 (11:10 +0700)
Relax condition on runtime trip count unrolling loops with 1 non-latch exit
that leads to a deop block.

There are cases when the deopt blocks are common exits for different loops.
LoopSimplify pass splits such edges to the common deopting blocks to make
sure that all exit nodes of the loop only have predecessors that are inside
of the loop (See simplifyOneLoop()). This breaks the current condition for
unrolling. This patch allows the split transitive blocks that still lead to
the deopting blocks.

Differential Revision: https://reviews.llvm.org/D152639

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll

index b19156bcb420162dd0fcc4fdde22da2760ef4ee9..1e22eca30d2d3d10e523533794b8f5438fec4039 100644 (file)
@@ -457,7 +457,7 @@ static bool canProfitablyUnrollMultiExitLoop(
   // call.
   return (OtherExits.size() == 1 &&
           (UnrollRuntimeOtherExitPredictable ||
-           OtherExits[0]->getTerminatingDeoptimizeCall()));
+           OtherExits[0]->getPostdominatingDeoptimizeCall()));
   // TODO: These can be fine-tuned further to consider code size or deopt states
   // that are captured by the deoptimize exit block.
   // Also, we can extend this to support more cases, if we actually
index 32d1454ac764550113e74286fa5c3727001b7d1e..fbd4148b8e2bcf52654a8d937fd969d819eaf92b 100644 (file)
@@ -760,6 +760,348 @@ otherexit:
   ret i32 57
 }
 
+; the exit block is not a deopt block but it leads to deopt one.
+define i32 @test5(ptr nocapture %a, i64 %n) {
+;
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK:       entry.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], -8
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK:       for.exiting_block:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK:       for.exiting_block.1:
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK:       latch.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK:       for.exiting_block.2:
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK:       latch.2:
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK:       for.exiting_block.3:
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK:       latch.3:
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK:       for.exiting_block.4:
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK:       latch.4:
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_5:%.*]]
+; CHECK:       for.exiting_block.5:
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK:       latch.5:
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_6:%.*]]
+; CHECK:       for.exiting_block.6:
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK:       latch.6:
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_7:%.*]]
+; CHECK:       for.exiting_block.7:
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK:       latch.7:
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; CHECK:       latchexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK:       latchexit.unr-lcssa:
+; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
+; CHECK:       header.epil:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK:       for.exiting_block.epil:
+; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[TMP0]], 42
+; CHECK-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; CHECK:       latch.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       latchexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[LATCHEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       otherexit.loopexit:
+; CHECK-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
+; CHECK:       otherexit.loopexit3:
+; CHECK-NEXT:    br label [[OTHEREXIT]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT2:%.*]]
+; CHECK:       otherexit2:
+; CHECK-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; CHECK-NEXT:    ret i32 [[RVAL2]]
+;
+; NOUNROLL-LABEL: @test5(
+; NOUNROLL-NEXT:  entry:
+; NOUNROLL-NEXT:    br label [[HEADER:%.*]]
+; NOUNROLL:       header:
+; NOUNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL:       for.exiting_block:
+; NOUNROLL-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT:    br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; NOUNROLL:       latch:
+; NOUNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT:    br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL:       latchexit:
+; NOUNROLL-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL:       otherexit:
+; NOUNROLL-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    [[RVAL:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    br label [[OTHEREXIT2:%.*]]
+; NOUNROLL:       otherexit2:
+; NOUNROLL-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; NOUNROLL-NEXT:    ret i32 [[RVAL2]]
+;
+; ENABLED-LABEL: @test5(
+; ENABLED-NEXT:  entry:
+; ENABLED-NEXT:    [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED:       entry.new:
+; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; ENABLED-NEXT:    br label [[HEADER:%.*]]
+; ENABLED:       header:
+; ENABLED-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; ENABLED-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; ENABLED:       for.exiting_block:
+; ENABLED-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; ENABLED:       latch:
+; ENABLED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; ENABLED-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; ENABLED-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ENABLED-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
+; ENABLED:       for.exiting_block.1:
+; ENABLED-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; ENABLED:       latch.1:
+; ENABLED-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; ENABLED-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_2:%.*]]
+; ENABLED:       for.exiting_block.2:
+; ENABLED-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; ENABLED:       latch.2:
+; ENABLED-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; ENABLED-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; ENABLED-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_3:%.*]]
+; ENABLED:       for.exiting_block.3:
+; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; ENABLED:       latch.3:
+; ENABLED-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; ENABLED-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_4:%.*]]
+; ENABLED:       for.exiting_block.4:
+; ENABLED-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; ENABLED:       latch.4:
+; ENABLED-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; ENABLED-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; ENABLED-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_5:%.*]]
+; ENABLED:       for.exiting_block.5:
+; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; ENABLED:       latch.5:
+; ENABLED-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; ENABLED-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_6:%.*]]
+; ENABLED:       for.exiting_block.6:
+; ENABLED-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; ENABLED:       latch.6:
+; ENABLED-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; ENABLED-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; ENABLED-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_7:%.*]]
+; ENABLED:       for.exiting_block.7:
+; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; ENABLED:       latch.7:
+; ENABLED-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; ENABLED-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; ENABLED-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; ENABLED:       latchexit.unr-lcssa.loopexit:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; ENABLED-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; ENABLED:       latchexit.unr-lcssa:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]]
+; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
+; ENABLED:       header.epil:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
+; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; ENABLED:       for.exiting_block.epil:
+; ENABLED-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; ENABLED:       latch.epil:
+; ENABLED-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; ENABLED-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; ENABLED-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; ENABLED-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; ENABLED-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; ENABLED-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; ENABLED-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; ENABLED:       latchexit.epilog-lcssa:
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH2:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
+; ENABLED-NEXT:    br label [[LATCHEXIT]]
+; ENABLED:       latchexit:
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; ENABLED:       otherexit.loopexit:
+; ENABLED-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; ENABLED-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT:%.*]]
+; ENABLED:       otherexit.loopexit3:
+; ENABLED-NEXT:    [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
+; ENABLED-NEXT:    [[RVAL_PH5:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT]]
+; ENABLED:       otherexit:
+; ENABLED-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_LCSSA_PH4]], [[OTHEREXIT_LOOPEXIT3]] ]
+; ENABLED-NEXT:    [[RVAL:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[RVAL_PH5]], [[OTHEREXIT_LOOPEXIT3]] ]
+; ENABLED-NEXT:    br label [[OTHEREXIT2:%.*]]
+; ENABLED:       otherexit2:
+; ENABLED-NEXT:    [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; ENABLED-NEXT:    ret i32 [[RVAL2]]
+;
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br label %for.exiting_block
 
+for.exiting_block:
+  %cmp = icmp eq i64 %n, 42
+  br i1 %cmp, label %otherexit, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchexit, label %header
+
+latchexit:                                          ; preds = %latch
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+otherexit:
+  %rval = phi i32 [%sum.02, %for.exiting_block ]
+  br label %otherexit2
+
+otherexit2:
+  %rval2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02) ]
+  ret i32 %rval2
+}
 
 declare i32 @llvm.experimental.deoptimize.i32(...)