From b46c085d2b6d15873fb53718f0a70b3848e19e4a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 26 Feb 2021 16:48:58 +0300 Subject: [PATCH] [NFCI] SCEVExpander: emit intrinsics for integral {u,s}{min,max} SCEV expressions These intrinsics, not the icmp+select are the canonical form nowadays, so we might as well directly emit them. This should not cause any regressions, but if it does, then then they would needed to be fixed regardless. Note that this doesn't deal with `SCEVExpander::isHighCostExpansion()`, but that is a pessimization, not a correctness issue. Additionally, the non-intrinsic form has issues with undef, see https://reviews.llvm.org/D88287#2587863 --- .../Transforms/Utils/ScalarEvolutionExpander.cpp | 41 +- .../2007-08-06-MisinterpretBranch.ll | 27 +- .../CodeGen/Thumb2/LowOverheadLoops/unpredload.ll | 2 +- .../Transforms/HardwareLoops/ARM/fp-emulation.ll | 12 +- .../test/Transforms/HardwareLoops/ARM/simple-do.ll | 3 +- llvm/test/Transforms/HardwareLoops/loop-guards.ll | 6 +- llvm/test/Transforms/IRCE/bad_expander.ll | 3 +- llvm/test/Transforms/IRCE/clamp.ll | 3 +- llvm/test/Transforms/IRCE/conjunctive-checks.ll | 15 +- llvm/test/Transforms/IRCE/correct-loop-info.ll | 37 +- llvm/test/Transforms/IRCE/decrementing-loop.ll | 6 +- .../Transforms/IRCE/multiple-access-no-preloop.ll | 9 +- .../IRCE/non-loop-invariant-rhs-instr.ll | 45 ++- .../Transforms/IRCE/range_intersect_miscompile.ll | 3 +- .../Transforms/IRCE/ranges_of_different_types.ll | 42 +-- llvm/test/Transforms/IRCE/rc-negative-bound.ll | 152 ++++---- .../Transforms/IRCE/single-access-no-preloop.ll | 6 +- .../Transforms/IRCE/single-access-with-preloop.ll | 24 +- .../Transforms/IRCE/unsigned_comparisons_ugt.ll | 6 +- .../Transforms/IRCE/unsigned_comparisons_ult.ll | 6 +- .../Transforms/IndVarSimplify/ARM/code-size.ll | 242 +++++------- .../IndVarSimplify/X86/eliminate-trunc.ll | 52 ++- .../test/Transforms/IndVarSimplify/X86/iv-widen.ll | 30 +- .../X86/loop-invariant-conditions.ll | 22 +- llvm/test/Transforms/IndVarSimplify/elim-extend.ll | 22 +- .../IndVarSimplify/eliminate-comparison.ll | 10 +- .../Transforms/IndVarSimplify/full_widening.ll | 3 +- .../IndVarSimplify/lcssa-preservation.ll | 37 +- .../Transforms/IndVarSimplify/lftr-multi-exit.ll | 3 +- llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll | 13 +- .../Transforms/IndVarSimplify/loop-predication.ll | 112 +++--- .../Transforms/IndVarSimplify/no-iv-rewrite.ll | 7 +- .../IndVarSimplify/replace-loop-exit-folds.ll | 13 +- llvm/test/Transforms/IndVarSimplify/sentinel.ll | 7 +- .../Transforms/IndVarSimplify/widen-loop-comp.ll | 70 ++-- .../Transforms/LoopPredication/predicate-exits.ll | 173 ++++----- .../runtime-loop-multiexit-dom-verify.ll | 6 +- .../LoopVectorize/X86/cost-model-assert.ll | 103 ++--- .../LoopVectorize/X86/float-induction-x86.ll | 158 ++++---- .../LoopVectorize/X86/invariant-load-gather.ll | 65 ++-- .../X86/invariant-store-vectorization.ll | 220 ++++++----- llvm/test/Transforms/LoopVectorize/X86/pr23997.ll | 60 ++- llvm/test/Transforms/LoopVectorize/X86/pr35432.ll | 84 ++--- .../first-order-recurrence-complex.ll | 96 +++-- .../LoopVectorize/first-order-recurrence.ll | 18 +- .../Transforms/LoopVectorize/if-pred-stores.ll | 136 +++++-- .../LoopVectorize/interleaved-accesses.ll | 415 ++++++++++----------- .../LoopVectorize/invariant-store-vectorization.ll | 18 +- llvm/test/Transforms/LoopVectorize/loop-form.ll | 219 ++++++----- 49 files changed, 1392 insertions(+), 1470 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 849ca24..2c9b4cb 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1715,8 +1715,14 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "smax"); + else { + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1738,8 +1744,14 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "umax"); + else { + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1761,8 +1773,14 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "smin"); + else { + Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1784,8 +1802,14 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpULT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "umin"); + else { + Value *ICmp = Builder.CreateICmpULT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -2262,6 +2286,7 @@ template static InstructionCost costAndCollectOperands( case scUMaxExpr: case scSMinExpr: case scUMinExpr: { + // FIXME: should this ask the cost for Intrinsic's? Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); break; diff --git a/llvm/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/llvm/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll index fd09fd5..9b60efd 100644 --- a/llvm/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll @@ -1,20 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s ; PR1598 -; CHECK: icmp s - define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[X_ADDR_1:%.*]] = select i1 [[TMP3]], i32 [[X]], i32 [[SMAX]] +; CHECK-NEXT: ret i32 [[X_ADDR_1]] +; entry: - %tmp3 = icmp eq i32 %a, %b ; [#uses=1] - br i1 %tmp3, label %return, label %bb + %tmp3 = icmp eq i32 %a, %b ; [#uses=1] + br i1 %tmp3, label %return, label %bb bb: ; preds = %bb, %entry - %x_addr.0 = phi i32 [ %tmp6, %bb ], [ %x, %entry ] ; [#uses=1] - %tmp6 = add i32 %x_addr.0, 1 ; [#uses=3] - %tmp9 = icmp slt i32 %tmp6, %y ; [#uses=1] - br i1 %tmp9, label %bb, label %return + %x_addr.0 = phi i32 [ %tmp6, %bb ], [ %x, %entry ] ; [#uses=1] + %tmp6 = add i32 %x_addr.0, 1 ; [#uses=3] + %tmp9 = icmp slt i32 %tmp6, %y ; [#uses=1] + br i1 %tmp9, label %bb, label %return return: ; preds = %bb, %entry - %x_addr.1 = phi i32 [ %x, %entry ], [ %tmp6, %bb ] ; [#uses=1] - ret i32 %x_addr.1 + %x_addr.1 = phi i32 [ %x, %entry ], [ %tmp6, %bb ] ; [#uses=1] + ret i32 %x_addr.1 } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 98d30c8..4b16066 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -5,7 +5,7 @@ define void @arm_cmplx_mag_squared_q15_mve(i16* %pSrc, i16* %pDst, i32 %blockSiz ; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs.w r12, r2, #8 +; CHECK-NEXT: subs.w r3, r2, #8 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll index 6c80acc..5de4e3b 100644 --- a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll @@ -5,8 +5,7 @@ ; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations ; CHECK: entry: -; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 -; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK-FP: [[COUNT:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %n, i32 1) ; CHECK: while.body.lr.ph: ; CHECK-FP: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 [[COUNT]]) @@ -56,8 +55,7 @@ cleanup: ; CHECK-LABEL: test_fptoui ; CHECK: entry: -; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 -; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK-FP: [[COUNT:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %n, i32 1) ; CHECK-FP: while.body.lr.ph: ; CHECK-FP: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-FP-NEXT: br label %while.body @@ -108,8 +106,7 @@ cleanup: ; CHECK-LABEL: load_store_float ; CHECK: entry: -; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 -; CHECK: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK: [[COUNT:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %n, i32 1) ; CHECK: while.body.lr.ph: ; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-NEXT: br label %while.body @@ -158,8 +155,7 @@ cleanup: ; CHECK-LABEL: fp_add ; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations ; CHECK: entry: -; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 -; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK-FP: [[COUNT:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %n, i32 1) ; CHECK: while.body.lr.ph: ; CHECK-FP: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 [[COUNT]]) ; CHECK: br label %while.body diff --git a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll index f2ed9b8..d4f8683 100644 --- a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll @@ -146,8 +146,7 @@ while.end: ; CHECK: entry: ; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, 1 -; CHECK: [[CMP:%[^ ]+]] = icmp slt i32 %n, 2 -; CHECK: [[SMIN:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 2 +; CHECK: [[SMIN:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 2) ; CHECK: [[SUB:%[^ ]+]] = sub i32 [[ROUND]], [[SMIN]] ; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[SUB]], 1 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1 diff --git a/llvm/test/Transforms/HardwareLoops/loop-guards.ll b/llvm/test/Transforms/HardwareLoops/loop-guards.ll index 83fbc5a..840779c 100644 --- a/llvm/test/Transforms/HardwareLoops/loop-guards.ll +++ b/llvm/test/Transforms/HardwareLoops/loop-guards.ll @@ -6,8 +6,7 @@ ; CHECK-LABEL: test1 ; CHECK: entry: -; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %N, 2 -; CHECK: [[MAX:%[^ ]+]] = select i1 [[CMP]], i32 %N, i32 2 +; CHECK: [[MAX:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %N, i32 2) ; CHECK: [[COUNT:%[^ ]+]] = add i32 [[MAX]], -1 ; CHECK: br i1 %t1, label %do.body.preheader ; CHECK: do.body.preheader: @@ -60,8 +59,7 @@ if.end: ; preds = %do.body, %entry ; CHECK-LABEL: test3 ; CHECK: entry: -; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %N, 1 -; CHECK: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %N, i32 1 +; CHECK: [[COUNT:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %N, i32 1) ; CHECK: br i1 %brmerge.demorgan, label %do.body.preheader ; CHECK: do.body.preheader: ; CHECK-EXIT: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) diff --git a/llvm/test/Transforms/IRCE/bad_expander.ll b/llvm/test/Transforms/IRCE/bad_expander.ll index 8b4a440c..8eb7695 100644 --- a/llvm/test/Transforms/IRCE/bad_expander.ll +++ b/llvm/test/Transforms/IRCE/bad_expander.ll @@ -90,8 +90,7 @@ define void @test_03(i64* %p1, i64* %p2, i1 %maybe_exit) { ; CHECK-NEXT: %num = load i64, i64* %p1, align 4 ; CHECK-NEXT: [[DIV:%[^ ]+]] = udiv i64 %num, 13 ; CHECK-NEXT: [[DIV_MINUS_1:%[^ ]+]] = add nsw i64 [[DIV]], -1 -; CHECK-NEXT: [[COMP1:%[^ ]+]] = icmp sgt i64 [[DIV_MINUS_1]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[COMP1]], i64 [[DIV_MINUS_1]], i64 0 +; CHECK-NEXT: %exit.mainloop.at = call i64 @llvm.smax.i64(i64 [[DIV_MINUS_1]], i64 0) ; CHECK-NEXT: [[COMP2:%[^ ]+]] = icmp slt i64 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[COMP2]], label %loop.preheader, label %main.pseudo.exit ; CHECK-NOT: preloop diff --git a/llvm/test/Transforms/IRCE/clamp.ll b/llvm/test/Transforms/IRCE/clamp.ll index e672b65..6c1f9c2 100644 --- a/llvm/test/Transforms/IRCE/clamp.ll +++ b/llvm/test/Transforms/IRCE/clamp.ll @@ -28,8 +28,7 @@ preheader: ; preds = %entry ; CHECK-NEXT: %length_gep.i146 = getelementptr inbounds i8, i8 addrspace(1)* undef, i64 8 ; CHECK-NEXT: %length_gep_typed.i147 = bitcast i8 addrspace(1)* undef to i32 addrspace(1)* ; CHECK-NEXT: %tmp43 = icmp ult i64 %indvars.iv.next467, %tmp21 -; CHECK-NEXT: [[C0:%[^ ]+]] = icmp ugt i64 %tmp21, 1 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[C0]], i64 %tmp21, i64 1 +; CHECK-NEXT: %exit.mainloop.at = call i64 @llvm.umax.i64(i64 %tmp21, i64 1) ; CHECK-NEXT: [[C1:%[^ ]+]] = icmp ult i64 1, %exit.mainloop.at ; CHECK-NEXT: br i1 [[C1]], label %loop.preheader, label %main.pseudo.exit diff --git a/llvm/test/Transforms/IRCE/conjunctive-checks.ll b/llvm/test/Transforms/IRCE/conjunctive-checks.ll index acc77b1..39ce32e 100644 --- a/llvm/test/Transforms/IRCE/conjunctive-checks.ll +++ b/llvm/test/Transforms/IRCE/conjunctive-checks.ll @@ -6,10 +6,8 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { ; CHECK: loop.preheader: ; CHECK: [[len_sub:[^ ]+]] = add nsw i32 %len, -4 -; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]] -; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 -; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 [[len_sub]]) +; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[exit_main_loop_at_hiclamp]], i32 0) ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] ; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit @@ -56,12 +54,9 @@ define void @f_1( ; CHECK-LABEL: @f_1( ; CHECK: loop.preheader: -; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a -; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a -; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n -; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n -; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 -; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 +; CHECK: [[smax_len:[^ ]+]] = call i32 @llvm.smin.i32(i32 %len.b, i32 %len.a) +; CHECK: [[upper_limit_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 [[smax_len]], i32 %n) +; CHECK: [[upper_limit:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[upper_limit_loclamp]], i32 0) entry: %len.a = load i32, i32* %a_len_ptr, !range !0 diff --git a/llvm/test/Transforms/IRCE/correct-loop-info.ll b/llvm/test/Transforms/IRCE/correct-loop-info.ll index 71bbbad..bc7275f 100644 --- a/llvm/test/Transforms/IRCE/correct-loop-info.ll +++ b/llvm/test/Transforms/IRCE/correct-loop-info.ll @@ -13,35 +13,38 @@ source_filename = "correct-loop-info.ll" define void @baz() personality i32* ()* @ham { ; CHECK-LABEL: @baz( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[EXIT_PRELOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 -1) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) ; CHECK-NEXT: br label [[OUTERHEADER:%.*]] ; CHECK: outerheader: ; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 undef, 84 ; CHECK-NEXT: br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]] ; CHECK: bb2: -; CHECK-NEXT: br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 undef, [[EXIT_PRELOOP_AT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] ; CHECK: innerheader.preloop.preheader: ; CHECK-NEXT: br label [[INNERHEADER_PRELOOP:%.*]] ; CHECK: mainloop: -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], 0 -; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: innerheader.preheader: ; CHECK-NEXT: br label [[INNERHEADER:%.*]] ; CHECK: innerheader: ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ] ; CHECK-NEXT: invoke void @pluto() -; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp +; CHECK-NEXT: to label [[BB5:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT_SPLIT_LP_LOOPEXIT_SPLIT_LP:%.*]] ; CHECK: bb5: ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 ; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]] ; CHECK: bb8: ; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84 -; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -53,11 +56,11 @@ define void @baz() personality i32* ()* @ham { ; CHECK: outer_exiting.loopexit.split-lp.loopexit: ; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK-NEXT: br label [[OUTER_EXITING_LOOPEXIT_SPLIT_LP:%.*]] ; CHECK: outer_exiting.loopexit.split-lp.loopexit.split-lp: -; CHECK-NEXT: %lpad.loopexit.split-lp3 = landingpad { i8*, i32 } +; CHECK-NEXT: [[LPAD_LOOPEXIT_SPLIT_LP3:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK-NEXT: br label [[OUTER_EXITING_LOOPEXIT_SPLIT_LP]] ; CHECK: outer_exiting.loopexit.split-lp: ; CHECK-NEXT: br label [[OUTER_EXITING]] ; CHECK: outer_exiting: @@ -95,12 +98,12 @@ define void @baz() personality i32* ()* @ham { ; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]] ; CHECK: bb8.preloop: ; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1 -; CHECK-NEXT: br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], [[EXIT_PRELOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], [[LOOP0:!llvm.loop !.*]], !irce.loop.clone !5 ; CHECK: preloop.exit.selector: ; CHECK-NEXT: [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]] ; CHECK: preloop.pseudo.exit: ; CHECK-NEXT: [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] @@ -110,14 +113,14 @@ define void @baz() personality i32* ()* @ham { ; CHECK: innerheader.postloop: ; CHECK-NEXT: [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ] ; CHECK-NEXT: invoke void @pluto() -; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit +; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT_SPLIT_LP_LOOPEXIT:%.*]] ; CHECK: bb5.postloop: ; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1 ; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 1 ; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]] ; CHECK: bb8.postloop: ; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84 -; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5 +; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], [[LOOP6:!llvm.loop !.*]], !irce.loop.clone !5 ; bb: br label %outerheader diff --git a/llvm/test/Transforms/IRCE/decrementing-loop.ll b/llvm/test/Transforms/IRCE/decrementing-loop.ll index a824522..4626071 100644 --- a/llvm/test/Transforms/IRCE/decrementing-loop.ll +++ b/llvm/test/Transforms/IRCE/decrementing-loop.ll @@ -29,10 +29,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { ret void ; CHECK: loop.preheader: -; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n -; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n -; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 -; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 +; CHECK: [[len_hiclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %len, i32 %n) +; CHECK: [[not_exit_preloop_at:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[len_hiclamp]], i32 0) ; CHECK: %exit.preloop.at = add nsw i32 [[not_exit_preloop_at]], -1 } diff --git a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll index 2abbe41..778b755 100644 --- a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -38,12 +38,9 @@ define void @multiple_access_no_preloop( ; CHECK-LABEL: @multiple_access_no_preloop( ; CHECK: loop.preheader: -; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a -; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a -; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n -; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n -; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 -; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 +; CHECK: [[smax_len:[^ ]+]] = call i32 @llvm.smin.i32(i32 %len.b, i32 %len.a) +; CHECK: [[upper_limit_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 [[smax_len]], i32 %n) +; CHECK: [[upper_limit:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[upper_limit_loclamp]], i32 0) ; CHECK: loop: ; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds diff --git a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll index 3612270..dc33492 100644 --- a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll +++ b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll @@ -10,39 +10,36 @@ define i32 @test_01(i32 %A, i64 %Len, i32 *%array) { ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i64 [[LEN]], 0 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP2]], i64 [[LEN]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[LEN]], [[SMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[UMIN]], 1 -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i64 [[UMIN]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 1, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[LEN]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[LEN]], [[SMIN]] +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP1]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umax.i64(i64 [[UMIN]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 1, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: loop.preheader2: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[LATCH:%.*]] ], [ 1, [[LOOP_PREHEADER2]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[INDVAR]], [[LEN]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[INDVAR]], [[LEN]] ; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT_LOOPEXIT3:%.*]] ; CHECK: guarded: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[INDVAR]] -; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[ADDR]] +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[ZERO_LOOPEXIT_LOOPEXIT4:%.*]], label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[INDVAR_NEXT]] = add nuw nsw i64 [[INDVAR]], 2 ; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[A]] to i64 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i64 [[INDVAR_NEXT]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true -; CHECK-NEXT: br i1 [[TMP10]], label [[MAIN_EXIT_SELECTOR:%.*]], label [[LOOP]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i64 [[INDVAR_NEXT]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true +; CHECK-NEXT: br i1 [[TMP7]], label [[MAIN_EXIT_SELECTOR:%.*]], label [[LOOP]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[INDVAR_NEXT_LCSSA:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[LATCH]] ] ; CHECK-NEXT: [[RES2_LCSSA1:%.*]] = phi i32 [ [[RES2]], [[LATCH]] ] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 [[INDVAR_NEXT_LCSSA]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[INDVAR_NEXT_LCSSA]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[INDVAR_COPY:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVAR_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVAR_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -71,19 +68,19 @@ define i32 @test_01(i32 %A, i64 %Len, i32 *%array) { ; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] ; CHECK: loop.postloop: ; CHECK-NEXT: [[INDVAR_POSTLOOP:%.*]] = phi i64 [ [[INDVAR_NEXT_POSTLOOP:%.*]], [[LATCH_POSTLOOP]] ], [ [[INDVAR_COPY]], [[POSTLOOP]] ] -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[INDVAR_POSTLOOP]], [[LEN]] -; CHECK-NEXT: br i1 [[TMP12]], label [[GUARDED_POSTLOOP:%.*]], label [[DEOPT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i64 [[INDVAR_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[TMP9]], label [[GUARDED_POSTLOOP:%.*]], label [[DEOPT_LOOPEXIT:%.*]] ; CHECK: guarded.postloop: ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[INDVAR_POSTLOOP]] -; CHECK-NEXT: [[RES_POSTLOOP:%.*]] = load i32, i32* [[ADDR_POSTLOOP]] +; CHECK-NEXT: [[RES_POSTLOOP:%.*]] = load i32, i32* [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp eq i32 [[RES_POSTLOOP]], 0 ; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[ZERO_LOOPEXIT_LOOPEXIT:%.*]], label [[LATCH_POSTLOOP]] ; CHECK: latch.postloop: ; CHECK-NEXT: [[INDVAR_NEXT_POSTLOOP]] = add nuw nsw i64 [[INDVAR_POSTLOOP]], 2 ; CHECK-NEXT: [[RES2_POSTLOOP]] = mul i32 [[RES_POSTLOOP]], 3 -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[A]] to i64 -; CHECK-NEXT: [[CMP2_POSTLOOP:%.*]] = icmp ugt i64 [[INDVAR_NEXT_POSTLOOP]], [[TMP13]] -; CHECK-NEXT: br i1 [[CMP2_POSTLOOP]], label [[LOOPEXIT_LOOPEXIT]], label [[LOOP_POSTLOOP]], !llvm.loop !0, !irce.loop.clone !5 +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: [[CMP2_POSTLOOP:%.*]] = icmp ugt i64 [[INDVAR_NEXT_POSTLOOP]], [[TMP10]] +; CHECK-NEXT: br i1 [[CMP2_POSTLOOP]], label [[LOOPEXIT_LOOPEXIT]], label [[LOOP_POSTLOOP]], [[LOOP0:!llvm.loop !.*]], !irce.loop.clone !5 ; preheader: %tripcheck = icmp sgt i64 %Len, 2 diff --git a/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll b/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll index 3030bed..fa7544d 100644 --- a/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll +++ b/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll @@ -227,8 +227,7 @@ define void @test_05(i32* %p) { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %n = load i32, i32* %p, align 4, !range ! -; CHECK-NEXT: [[CMP_1:%[^ ]+]] = icmp ugt i32 %n, 2 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP_1]], i32 %n, i32 2 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umax.i32(i32 %n, i32 2) ; CHECK-NEXT: [[CMP_2:%[^ ]+]] = icmp ult i32 2, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP_2]], label %loop_header.preheader, label %main.pseudo.exit ; CHECK: range_check_block: ; preds = %inner_loop diff --git a/llvm/test/Transforms/IRCE/ranges_of_different_types.ll b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll index 12f99ea..5a838aa 100644 --- a/llvm/test/Transforms/IRCE/ranges_of_different_types.ll +++ b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll @@ -24,10 +24,8 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smax.i32(i32 [[SMAX]], i32 0) ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -80,11 +78,9 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smax.i32(i32 [[LEN_MINUS_SMAX]], i32 -13) ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: loop.preloop: ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] @@ -146,11 +142,9 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %len, i32 13) ; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB3]], i32 101) ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: postloop: @@ -201,8 +195,7 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB1]], i32 101) ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -244,10 +237,8 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smax.i32(i32 [[SMAX]], i32 0) ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -285,11 +276,9 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smax.i32(i32 [[LEN_MINUS_SMAX]], i32 -13) ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -330,11 +319,9 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %len, i32 13) ; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB3]], i32 101) ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -372,8 +359,7 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13 -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB1]], i32 101) ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop diff --git a/llvm/test/Transforms/IRCE/rc-negative-bound.ll b/llvm/test/Transforms/IRCE/rc-negative-bound.ll index 5670d5d..ce56a9c 100644 --- a/llvm/test/Transforms/IRCE/rc-negative-bound.ll +++ b/llvm/test/Transforms/IRCE/rc-negative-bound.ll @@ -21,7 +21,7 @@ define void @test_01(i32 *%arr, i32 %n) { ; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: out.of.bounds: @@ -70,7 +70,7 @@ define void @test_02(i32 *%arr, i32 %n) { ; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: out.of.bounds: @@ -113,21 +113,16 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMIN]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[SMIN]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] -; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMIN2]], 0 -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMIN2]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[BOUND]], i32 0) +; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[N]], i32 [[TMP3]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN2]], i32 0) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: loop.preheader4: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -137,14 +132,14 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -170,9 +165,9 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0 ; CHECK: in.bounds.postloop: ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] -; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]] -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !1, !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], [[LOOP1:!llvm.loop !.*]], !irce.loop.clone !6 ; entry: @@ -206,17 +201,14 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMIN]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMIN]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP2]], i32 [[SMIN]], i32 -1 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[BOUND:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[BOUND]], [[SMIN]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[SMAX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[N]], i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: loop.preheader1: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -226,14 +218,14 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -259,9 +251,9 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0 ; CHECK: in.bounds.postloop: ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] -; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], [[N]] -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !7, !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], [[LOOP7:!llvm.loop !.*]], !irce.loop.clone !6 ; entry: @@ -305,7 +297,7 @@ define void @test_05(i32 *%arr, i32 %n) { ; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: out.of.bounds: @@ -356,7 +348,7 @@ define void @test_06(i32 *%arr, i32 %n) { ; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: out.of.bounds: @@ -401,21 +393,16 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMIN]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[SMIN]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] -; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMIN2]], 0 -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMIN2]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[BOUND]], i32 0) +; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[N]], i32 [[TMP3]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN2]], i32 0) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: loop.preheader4: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -425,14 +412,14 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -458,9 +445,9 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0 ; CHECK: in.bounds.postloop: ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] -; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]] -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !8, !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], [[LOOP8:!llvm.loop !.*]], !irce.loop.clone !6 ; entry: %first.itr.check = icmp sgt i32 %n, 0 @@ -496,17 +483,14 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMIN]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMIN]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP2]], i32 [[SMIN]], i32 -1 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[BOUND:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[BOUND]], [[SMIN]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[SMAX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[N]], i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ; CHECK: loop.preheader1: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -516,14 +500,14 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] -; CHECK-NEXT: store i32 0, i32* [[ADDR]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] @@ -549,9 +533,9 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) { ; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0 ; CHECK: in.bounds.postloop: ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] -; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], [[N]] -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !9, !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], [[LOOP9:!llvm.loop !.*]], !irce.loop.clone !6 ; entry: %first.itr.check = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll index a13ac51..6085ae6 100644 --- a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll +++ b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll @@ -87,10 +87,8 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 ; CHECK: loop.preheader: ; CHECK: [[safe_range_end:[^ ]+]] = add nsw i32 %len, -4 -; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] -; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 -; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 [[safe_range_end]]) +; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[exit_main_loop_at_hiclamp]], i32 0) ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] ; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit diff --git a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll index f883b90..2540248 100644 --- a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll +++ b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll @@ -31,30 +31,22 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 ; CHECK-LABEL: @single_access_with_preloop( ; CHECK: loop.preheader: -; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 -; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 +; CHECK: [[safe_offset_preloop:[^ ]+]] = call i32 @llvm.smax.i32(i32 %offset, i32 -2147483647) ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. ; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] -; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] -; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] -; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 -; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 +; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 [[safe_start]]) +; CHECK: [[exit_preloop_at:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[exit_preloop_at_loclamp]], i32 0) ; CHECK: [[len_minus_sint_max:[^ ]+]] = add nuw nsw i32 %len, -2147483647 -; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] -; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] +; CHECK: [[safe_offset_mainloop:[^ ]+]] = call i32 @llvm.smax.i32(i32 %offset, i32 [[len_minus_sint_max]]) ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. ; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] -; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] -; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] -; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 -; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 +; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 [[safe_upper_end]]) +; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = call i32 @llvm.smax.i32(i32 %offset, i32 0) ; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] -; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] -; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] -; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 -; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 +; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]]) +; CHECK: [[exit_mainloop_at:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[exit_mainloop_at_hiclamp]], i32 0) ; CHECK: mainloop: ; CHECK: br label %loop diff --git a/llvm/test/Transforms/IRCE/unsigned_comparisons_ugt.ll b/llvm/test/Transforms/IRCE/unsigned_comparisons_ugt.ll index e300148..111604a 100644 --- a/llvm/test/Transforms/IRCE/unsigned_comparisons_ugt.ll +++ b/llvm/test/Transforms/IRCE/unsigned_comparisons_ugt.ll @@ -57,8 +57,7 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: test_02( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %len, i32 1) ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit @@ -148,8 +147,7 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: test_04( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %len, i32 1) ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit diff --git a/llvm/test/Transforms/IRCE/unsigned_comparisons_ult.ll b/llvm/test/Transforms/IRCE/unsigned_comparisons_ult.ll index c2b988e..ec2310f 100644 --- a/llvm/test/Transforms/IRCE/unsigned_comparisons_ult.ll +++ b/llvm/test/Transforms/IRCE/unsigned_comparisons_ult.ll @@ -60,8 +60,7 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: test_02( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %len, i32 1) ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit @@ -193,8 +192,7 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK: test_05( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = call i32 @llvm.umax.i32(i32 %len, i32 1) ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll index bf5f6b3..0d79d32 100644 --- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -6,32 +6,30 @@ define i32 @remove_loop(i32 %size) #0 { ; CHECK-V8M-LABEL: @remove_loop( ; CHECK-V8M-NEXT: entry: ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 -; CHECK-V8M-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-V8M-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 -; CHECK-V8M-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 ; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8M: while.cond: ; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8M: while.end: -; CHECK-V8M-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-V8M-NEXT: ret i32 [[TMP5]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] +; CHECK-V8M-NEXT: ret i32 [[TMP4]] ; ; CHECK-V8A-LABEL: @remove_loop( ; CHECK-V8A-NEXT: entry: ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 -; CHECK-V8A-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-V8A-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 -; CHECK-V8A-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 ; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8A: while.cond: ; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8A: while.end: -; CHECK-V8A-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-V8A-NEXT: ret i32 [[TMP5]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] +; CHECK-V8A-NEXT: ret i32 [[TMP4]] ; entry: br label %while.cond @@ -135,17 +133,15 @@ exit: define i32 @test1(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-LABEL: @test1( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -163,17 +159,15 @@ define i32 @test1(i32* %array, i32 %length, i32 %n) #0 { ; ; CHECK-V8A-LABEL: @test1( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -222,14 +216,13 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-LABEL: @test2( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -248,14 +241,13 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8A-LABEL: @test2( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -301,21 +293,17 @@ exit: ; preds = %guarded, %entry define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32 %n) #0 { ; CHECK-V8M-LABEL: @two_range_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] -; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -336,21 +324,17 @@ define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %l ; ; CHECK-V8A-LABEL: @two_range_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] -; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -404,25 +388,19 @@ exit: ; preds = %guarded, %entry define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { ; CHECK-V8M-LABEL: @three_range_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] -; CHECK-V8M-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] -; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 -; CHECK-V8M-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] -; CHECK-V8M-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] -; CHECK-V8M-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) +; CHECK-V8M-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -446,25 +424,19 @@ define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 ; ; CHECK-V8A-LABEL: @three_range_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] -; CHECK-V8A-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] -; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 -; CHECK-V8A-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] -; CHECK-V8A-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] -; CHECK-V8A-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) +; CHECK-V8A-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -527,20 +499,17 @@ exit: ; preds = %guarded, %entry define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { ; CHECK-V8M-LABEL: @distinct_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 -; CHECK-V8M-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] -; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -549,7 +518,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -566,20 +535,17 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; ; CHECK-V8A-LABEL: @distinct_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 -; CHECK-V8A-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] -; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -588,7 +554,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -644,17 +610,15 @@ exit: define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %length, i32 %n) #0 { ; CHECK-V8M-LABEL: @duplicate_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -680,17 +644,15 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; ; CHECK-V8A-LABEL: @duplicate_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -759,19 +721,17 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-LABEL: @different_ivs( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 -; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] -; CHECK-V8M-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -789,19 +749,17 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8A-LABEL: @different_ivs( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 -; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] -; CHECK-V8A-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll index 4738369..625103f 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -36,10 +36,9 @@ define void @test_01(i32 %n) { ; ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -66,10 +65,9 @@ define void @test_02(i32 %n) { ; ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 2147483646 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 2147483646 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 2147483646) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -119,15 +117,14 @@ define void @test_04(i32 %n) { ; ; CHECK-LABEL: @test_04( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], -2147483647 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 -2147483647) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483647, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -252,15 +249,14 @@ exit: define void @test_02_unsigned(i32 %n) { ; CHECK-LABEL: @test_02_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], -2 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[UMAX]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 -2) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -330,15 +326,14 @@ exit: define void @test_05_unsigned(i32 %n) { ; CHECK-LABEL: @test_05_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -381,10 +376,9 @@ exit: define void @test_07(i32* %p, i32 %n) { ; CHECK-LABEL: @test_07( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -472,14 +466,13 @@ define void @test_10(i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 90) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN]], -99 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP3]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -551,8 +544,7 @@ define void @test_12(i32* %p) { ; CHECK-LABEL: @test_12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0:!range !.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll index de48c63..9d83207 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll @@ -25,7 +25,7 @@ define void @loop_0(i32* %a) { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[O:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[O]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[O]], align 4 ; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[V]], 0 ; CHECK-NEXT: br i1 [[T]], label [[EXIT24:%.*]], label [[B24]] ; CHECK: B24: @@ -77,7 +77,7 @@ define void @loop_0_dead(i32* %a) { ; CHECK-NEXT: [[DOT02:%.*]] = phi i32 [ [[TMP33:%.*]], [[B24:%.*]] ], [ 0, [[B18_PREHEADER]] ] ; CHECK-NEXT: [[TMP33]] = add nuw i32 [[DOT02]], 1 ; CHECK-NEXT: [[O:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[DOT02]] -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[O]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[O]], align 4 ; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[V]], 0 ; CHECK-NEXT: br i1 [[T]], label [[EXIT24:%.*]], label [[B24]] ; CHECK: B24: @@ -122,15 +122,14 @@ define void @loop_1(i32 %lim) { ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 [[LIM:%.*]], 0 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[LIM]], 2 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[LIM]], i32 2 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM]], i32 2) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: call void @dummy.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: call void @dummy.i64(i64 [[TMP0]]) ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: leave.loopexit: @@ -167,35 +166,34 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8 %tmp1) { ; CHECK-NEXT: [[BC0:%.*]] = bitcast i32* [[LINED:%.*]] to i8* ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[HSIZE:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[NSTEPS:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP2]], i32 [[NSTEPS]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[NSTEPS:%.*]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT11:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 [[INDVARS_IV7]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 [[INDVARS_IV7]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]] ; CHECK: for.body2.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[FOR_BODY2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY2]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], [[INDVARS_IV]] -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[INDVARS_IV]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP4]] ; CHECK-NEXT: store i8 [[TMP1:%.*]], i8* [[ADD_PTR]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_BODY3_PREHEADER:%.*]] ; CHECK: for.body3.preheader: -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[WIDE_TRIP_COUNT5:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 1, [[FOR_BODY3_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], [[INDVARS_IV2]] -; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[INDVARS_IV2]] +; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP7]] ; CHECK-NEXT: store i8 [[TMP1]], i8* [[ADD_PTR2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 ; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT5]] diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll index ad11bc0..543615b 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -193,7 +193,7 @@ for.end: ; preds = %if.end, %entry define void @test7(i64 %start, i64* %inc_ptr) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0:!range !.*]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: loop.preheader: @@ -311,14 +311,13 @@ for.end: ; preds = %if.end, %entry define void @test3_neg(i64 %start) { ; CHECK-LABEL: @test3_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -339,9 +338,8 @@ for.end: ; preds = %if.end, %entry define void @test4_neg(i64 %start) { ; CHECK-LABEL: @test4_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[SMAX]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -350,7 +348,7 @@ define void @test4_neg(i64 %start) { ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -405,7 +403,7 @@ for.end: ; preds = %if.end, %entry define void @test8(i64 %start, i64* %inc_ptr) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !1 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG1:!range !.*]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: loop.preheader: @@ -525,7 +523,7 @@ exit: define void @test11(i64* %inc_ptr) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]] ; CHECK-NEXT: [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[NE_COND]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: @@ -576,7 +574,7 @@ exit: define void @test12(i64* %inc_ptr) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll index 033f6aa..ee83f20 100644 --- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll +++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll @@ -8,10 +8,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @postincConstIV(i8* %base, i32 %limit) nounwind { ; CHECK-LABEL: @postincConstIV( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LIMIT:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LIMIT]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] @@ -116,25 +115,24 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMITDEC]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[LIMIT]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[LIMIT]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[OUTERLOOP:%.*]] ; CHECK: outerloop: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[INNERCOUNT:%.*]] = phi i32 [ [[INNERCOUNT_MERGE:%.*]], [[OUTERMERGE]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV1]], -1 -; CHECK-NEXT: [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV1]], -1 +; CHECK-NEXT: [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i8 0, i8* [[ADR1]], align 1 ; CHECK-NEXT: br label [[INNERPREHEADER:%.*]] ; CHECK: innerpreheader: ; CHECK-NEXT: [[INNERPRECMP:%.*]] = icmp sgt i32 [[LIMITDEC]], [[INNERCOUNT]] ; CHECK-NEXT: br i1 [[INNERPRECMP]], label [[INNERLOOP_PREHEADER:%.*]], label [[OUTERMERGE]] ; CHECK: innerloop.preheader: -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INNERCOUNT]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INNERCOUNT]] to i64 ; CHECK-NEXT: br label [[INNERLOOP:%.*]] ; CHECK: innerloop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP3]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ADR2:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 0, i8* [[ADR2]], align 1 @@ -144,10 +142,10 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]] ; CHECK: innerexit: ; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 ; CHECK-NEXT: br label [[OUTERMERGE]] ; CHECK: outermerge: -; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ] +; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP3]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ] ; CHECK-NEXT: [[ADR4:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV1]] ; CHECK-NEXT: store i8 0, i8* [[ADR4]], align 1 ; CHECK-NEXT: [[OFS5:%.*]] = sext i32 [[INNERCOUNT_MERGE]] to i64 diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll index addad29..0bbcd3f 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -575,9 +575,8 @@ define void @func_17(i32* %len.ptr) { ; CHECK-NEXT: [[ENTRY_COND:%.*]] = and i1 [[ENTRY_COND_0]], [[ENTRY_COND_1]] ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LEN]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LEN]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[SMAX]], -5 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LEN]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX]], -5 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ -6, [[LOOP_PREHEADER]] ] @@ -586,7 +585,7 @@ define void @func_17(i32* %len.ptr) { ; CHECK-NEXT: br i1 true, label [[BE]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: be: ; CHECK-NEXT: call void @side_effect() -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_INC]], [[TMP1]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_INC]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] @@ -734,8 +733,7 @@ define void @func_20(i32* %length.ptr) { ; CHECK-NEXT: [[LENGTH_IS_NONZERO:%.*]] = icmp ne i32 [[LENGTH]], 0 ; CHECK-NEXT: br i1 [[LENGTH_IS_NONZERO]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LENGTH]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LENGTH]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LENGTH]], i32 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/full_widening.ll b/llvm/test/Transforms/IndVarSimplify/full_widening.ll index 00cf07f..4a4e473 100644 --- a/llvm/test/Transforms/IndVarSimplify/full_widening.ll +++ b/llvm/test/Transforms/IndVarSimplify/full_widening.ll @@ -7,8 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @test_01(double* %p, double %x, i32* %np, i32* %mp, i32 %k) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[K:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[K]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[K:%.*]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/IndVarSimplify/lcssa-preservation.ll b/llvm/test/Transforms/IndVarSimplify/lcssa-preservation.ll index 5d502f3..f06f42d 100644 --- a/llvm/test/Transforms/IndVarSimplify/lcssa-preservation.ll +++ b/llvm/test/Transforms/IndVarSimplify/lcssa-preservation.ll @@ -1,50 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s -; Make sure IndVars preserves LCSSA form, especially across loop nests. +; Make sure IndVars preserves LCSSA form, especially across loop nests. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" define void @PR18642(i32 %x) { ; CHECK-LABEL: @PR18642( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: br i1 false, label [[INNER_LATCH:%.*]], label [[OUTER_LATCH:%.*]] +; CHECK: inner.latch: +; CHECK-NEXT: br i1 true, label [[INNER_HEADER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: outer.latch: +; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[EXIT_LOOPEXIT1:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit.loopexit1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %outer.header -; CHECK: br label %outer.header outer.header: -; CHECK: outer.header: %outer.iv = phi i32 [ 0, %entry ], [ %x, %outer.latch ] br label %inner.header -; CHECK: %[[SCEV_EXPANDED:.*]] = add i32 -; CHECK: br label %inner.header inner.header: -; CHECK: inner.header: %inner.iv = phi i32 [ undef, %outer.header ], [ %inc, %inner.latch ] %cmp1 = icmp slt i32 %inner.iv, %outer.iv br i1 %cmp1, label %inner.latch, label %outer.latch -; CHECK: br i1 {{.*}}, label %inner.latch, label %outer.latch inner.latch: -; CHECK: inner.latch: %inc = add nsw i32 %inner.iv, 1 %cmp2 = icmp slt i32 %inner.iv, %outer.iv br i1 %cmp2, label %inner.header, label %exit -; CHECK: br i1 {{.*}}, label %inner.header, label %[[EXIT_FROM_INNER:.*]] outer.latch: -; CHECK: outer.latch: br i1 undef, label %outer.header, label %exit -; CHECK: br i1 {{.*}}, label %outer.header, label %[[EXIT_FROM_OUTER:.*]] -; CHECK: [[EXIT_FROM_INNER]]: -; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[SCEV_EXPANDED]], %inner.latch ] -; CHECK-NEXT: br label %exit -; CHECK: [[EXIT_FROM_OUTER]]: -; CHECK-NEXT: br label %exit exit: -; CHECK: exit: %exit.phi = phi i32 [ %inc, %inner.latch ], [ undef, %outer.latch ] -; CHECK-NEXT: phi i32 [ %[[LCSSA]], %[[EXIT_FROM_INNER]] ], [ undef, %[[EXIT_FROM_OUTER]] ] ret void } diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll index 7dfd4eb..c9894d6 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll @@ -128,8 +128,7 @@ exit: define void @compound_early_exit(i32 %n, i32 %m) { ; CHECK-LABEL: @compound_early_exit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[M:%.*]], [[N:%.*]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[M]], i32 [[N]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[M:%.*]], i32 [[N:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll index 8a55ec0..6f46ae0 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -27,7 +27,7 @@ define void @ptriv(i8* %base, i32 %n) nounwind { ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[BASE]] to i64 ; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 -; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]] +; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] @@ -142,10 +142,10 @@ define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV2]] ; CHECK-NEXT: [[MATRIXP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[MATRIX:%.*]], i32 0, i64 [[TMP1]] -; CHECK-NEXT: [[V1:%.*]] = load double, double* [[MATRIXP]] +; CHECK-NEXT: [[V1:%.*]] = load double, double* [[MATRIXP]], align 8 ; CHECK-NEXT: call void @use(double [[V1]]) ; CHECK-NEXT: [[VECTORP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[VECTOR:%.*]], i32 0, i64 [[INDVARS_IV2]] -; CHECK-NEXT: [[V2:%.*]] = load double, double* [[VECTORP]] +; CHECK-NEXT: [[V2:%.*]] = load double, double* [[VECTORP]], align 8 ; CHECK-NEXT: call void @use(double [[V2]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 @@ -187,8 +187,7 @@ define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector, ; ; CHECK-LABEL: @unguardedloop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[IROW:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[IROW]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[IROW:%.*]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -244,7 +243,7 @@ define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 -; CHECK-NEXT: store i8 3, i8* [[APTR]] +; CHECK-NEXT: store i8 3, i8* [[APTR]], align 1 ; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] @@ -311,7 +310,7 @@ define void @aryptriv([256 x i8]* %base, i32 %n) nounwind { ; CHECK: loop: ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 -; CHECK-NEXT: store i8 3, i8* [[APTR]] +; CHECK-NEXT: store i8 3, i8* [[APTR]], align 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: diff --git a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll index 77c18ef..eb56205 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll @@ -7,17 +7,15 @@ declare void @prevent_merging() define i32 @test1(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -78,7 +76,7 @@ define i32 @neg_store(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] @@ -177,14 +175,13 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -231,21 +228,17 @@ exit: ; preds = %guarded, %entry define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32 %n) { ; CHECK-LABEL: @two_range_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] -; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] -; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -299,25 +292,19 @@ exit: ; preds = %guarded, %entry define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { ; CHECK-LABEL: @three_range_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] -; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] -; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] -; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] -; CHECK-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) +; CHECK-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -380,20 +367,17 @@ exit: ; preds = %guarded, %entry define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { ; CHECK-LABEL: @distinct_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] -; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -402,7 +386,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -458,17 +442,15 @@ exit: define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %length, i32 %n) { ; CHECK-LABEL: @duplicate_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -624,7 +606,7 @@ define i32 @unconditional_latch_with_side_effect(i32* %a, i32 %length) { ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 ; CHECK: guarded: -; CHECK-NEXT: store volatile i32 0, i32* [[A:%.*]] +; CHECK-NEXT: store volatile i32 0, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 ; CHECK-NEXT: br label [[LOOP]] ; @@ -653,19 +635,17 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @different_ivs( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index 68566fd..43cc63c 100644 --- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -206,8 +206,7 @@ exit: define void @maxvisitor(i32 %limit, i32* %base) nounwind { ; CHECK-LABEL: @maxvisitor( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LIMIT:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LIMIT]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -218,12 +217,12 @@ define void @maxvisitor(i32 %limit, i32* %base) nounwind { ; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VAL]], [[MAX]] ; CHECK-NEXT: br i1 [[CMP19]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: br label [[LOOP_INC]] ; CHECK: if.else: ; CHECK-NEXT: br label [[LOOP_INC]] ; CHECK: loop.inc: -; CHECK-NEXT: [[MAX_NEXT]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[MAX]], [[IF_ELSE]] ] +; CHECK-NEXT: [[MAX_NEXT]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[MAX]], [[IF_ELSE]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll index e751275..26b72c2 100644 --- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -7,11 +7,10 @@ define i32 @remove_loop(i32 %size) { ; CHECK-LABEL: @remove_loop( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: ; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] @@ -19,8 +18,8 @@ define i32 @remove_loop(i32 %size) { ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %while.cond diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll index f59eeaa..f422019 100644 --- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll +++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll @@ -10,19 +10,18 @@ define void @test() personality i32* ()* @snork { ; CHECK-NEXT: br label [[BB4:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[TMP2:%.*]], [[SMAX:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[TMP1:%.*]], [[SMAX:%.*]] ; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB4]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP0]], [[BB1:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb4: ; CHECK-NEXT: [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 undef, 36 -; CHECK-NEXT: [[SMAX]] = select i1 [[TMP1]], i32 [[INDVARS_IV]], i32 36 +; CHECK-NEXT: [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36) ; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, i8* null, i32 7, i8* null, i32 7, i8* null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, i8* null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, i8* null, i32 4, i64 undef, i32 7, i8* null, i32 0, i8 addrspace(1)* undef, i32 3, i32 undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 0, i8 addrspace(1)* undef, i32 7, i8* null) ] ; CHECK-NEXT: to label [[BB7:%.*]] unwind label [[BB15:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[TMP2]] = add i32 [[TMP6]], [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1]] = add i32 [[TMP6]], [[INDVARS_IV]] ; CHECK-NEXT: br label [[BB9:%.*]] ; CHECK: bb9: ; CHECK-NEXT: br i1 true, label [[BB1]], label [[BB9]] diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll index 70e9a75..469fa7a 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -24,10 +24,9 @@ define i32 @test1() { ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @ptr, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @e, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP2]], i32 0) +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 @@ -36,21 +35,21 @@ define i32 @test1() { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ [[INDVARS_IV_NEXT]], [[FOR_COND:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_COND]] ; CHECK: if.then: ; CHECK-NEXT: [[I_05_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[I_05_LCSSA_WIDE]] to i32 -; CHECK-NEXT: store i32 [[TMP6]], i32* @idx, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[I_05_LCSSA_WIDE]] to i32 +; CHECK-NEXT: store i32 [[TMP5]], i32* @idx, align 4 ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.cond.for.end.loopexit_crit_edge: ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @idx, align 4 -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @idx, align 4 +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: store i32 -1, i32* @idx, align 4 @@ -99,8 +98,7 @@ define void @test2([8 x i8]* %a, i8* %b, i8 %limit) { ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32 ; CHECK-NEXT: br i1 undef, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[CONV]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[CONV]], i32 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1) ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.preheader: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] @@ -116,11 +114,11 @@ define void @test2([8 x i8]* %a, i8* %b, i8 %limit) { ; CHECK: for.body4.us: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[A:%.*]], i64 [[INDVARS_IV2]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX6_US]], align 1 -; CHECK-NEXT: [[IDXPROM7_US:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX6_US]], align 1 +; CHECK-NEXT: [[IDXPROM7_US:%.*]] = zext i8 [[TMP0]] to i64 ; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i64 [[IDXPROM7_US]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX8_US]], align 1 -; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX6_US]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX8_US]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[ARRAYIDX6_US]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY4_US]], label [[FOR_INC13_US_LOOPEXIT:%.*]] @@ -185,8 +183,7 @@ for.end: define i32 @test3(i32* %a, i32 %b) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[B:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[B]], i32 0 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B:%.*]], i32 0) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -196,8 +193,8 @@ define i32 @test3(i32* %a, i32 %b) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: @@ -307,10 +304,9 @@ for.end: define i32 @test6(i32* %a, i32 %b) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[B:%.*]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[B]], i32 -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B:%.*]], i32 -1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -319,8 +315,8 @@ define i32 @test6(i32* %a, i32 %b) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: @@ -352,10 +348,9 @@ define i32 @test7(i32* %a, i32 %b) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[B]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 -1) +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -364,8 +359,8 @@ define i32 @test7(i32* %a, i32 %b) { ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]] @@ -457,8 +452,7 @@ define i32 @test9(i32* %a, i32 %b, i32 %init) { ; CHECK-NEXT: br i1 [[E]], label [[FOR_COND_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INIT]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[INIT]], [[B:%.*]] -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[INIT]], i32 [[B]] +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INIT]], i32 [[B:%.*]]) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -468,11 +462,11 @@ define i32 @test9(i32* %a, i32 %b, i32 %init) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[TMP2]] ; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ] diff --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll index 83d85eb..01c7858 100644 --- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll +++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -8,15 +8,13 @@ define i32 @test1(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -37,7 +35,7 @@ define i32 @test1(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -91,15 +89,13 @@ define i32 @test_non_canonical(i32* %array, i32 %length, i1 %cond_0) { ; CHECK-LABEL: @test_non_canonical( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[LENGTH:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[LENGTH]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LENGTH:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -120,7 +116,7 @@ define i32 @test_non_canonical(i32* %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[LENGTH]] @@ -173,20 +169,17 @@ define i32 @test_two_range_checks(i32* %array, i32 %length.1, i32 %length.2, i32 ; CHECK-LABEL: @test_two_range_checks( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] -; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[COND_0:%.*]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LENGTH_2]], [[UMIN1]] -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[TMP6]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP9]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[TMP3]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP6]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -214,7 +207,7 @@ define i32 @test_two_range_checks(i32* %array, i32 %length.1, i32 %length.2, i32 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -288,7 +281,7 @@ define i32 @test_unanalyzeable_exit(i32* %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G, align 4 ; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 ; CHECK: deopt3: @@ -299,7 +292,7 @@ define i32 @test_unanalyzeable_exit(i32* %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] @@ -352,15 +345,13 @@ define i32 @test_unanalyzeable_exit2(i32* %array, i32 %length, i32 %n, i1 %cond_ ; CHECK-LABEL: @test_unanalyzeable_exit2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -378,7 +369,7 @@ define i32 @test_unanalyzeable_exit2(i32* %array, i32 %length, i32 %n, i1 %cond_ ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET2]] ; CHECK: guarded: -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G, align 4 ; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 ; CHECK: deopt3: @@ -389,7 +380,7 @@ define i32 @test_unanalyzeable_exit2(i32* %array, i32 %length, i32 %n, i1 %cond_ ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -473,10 +464,10 @@ define i32 @test_unanalyzeable_latch(i32* %array, i32 %length, i32 %n, i1 %cond_ ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G, align 4 ; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[UNKNOWN]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -551,7 +542,7 @@ define i32 @provably_taken(i32* %array, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], 200 @@ -626,7 +617,7 @@ define i32 @provably_not_taken(i32* %array, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], 200 @@ -681,15 +672,13 @@ define i32 @unswitch_exit_form(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-LABEL: @unswitch_exit_form( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt.loopexit: ; CHECK-NEXT: br label [[DEOPT]] @@ -712,7 +701,7 @@ define i32 @unswitch_exit_form(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -766,15 +755,13 @@ define i32 @swapped_wb(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-LABEL: @swapped_wb( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[TMP5]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[TMP3]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -795,7 +782,7 @@ define i32 @swapped_wb(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -846,16 +833,14 @@ exit: define i32 @trivial_wb(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @trivial_wb( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -875,7 +860,7 @@ define i32 @trivial_wb(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] @@ -949,7 +934,7 @@ define i32 @todo_unconditional_latch(i32* %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: br label [[LOOP]] @@ -997,18 +982,16 @@ define i32 @wb_in_loop(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP8]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[TMP3]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP6]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -1038,7 +1021,7 @@ define i32 @wb_in_loop(i32* %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index b3c0039..d354d68 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -232,7 +232,8 @@ define void @test6() { ; CHECK-LABEL: header.prol: ; CHECK-NEXT: %indvars.iv.prol = phi i64 [ undef, %header.prol.preheader ], [ %indvars.iv.next.prol, %latch.prol ] -; CHECK-NEXT: %prol.iter = phi i64 [ 1, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ] +; CHECK-NEXT: %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ] + ; CHECK-NEXT: br i1 false, label %latch.prol, label %otherexit.loopexit1 ; CHECK-LABEL: header.prol.loopexit.unr-lcssa: @@ -241,7 +242,8 @@ define void @test6() { ; CHECK-LABEL: header.prol.loopexit: ; CHECK-NEXT: %indvars.iv.unr = phi i64 [ undef, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] -; CHECK-NEXT: br i1 true, label %latchexit, label %entry.new +; CHECK-NEXT: %5 = icmp ult i64 %2, 3 +; CHECK-NEXT: br i1 %5, label %latchexit, label %entry.new ; CHECK-LABEL: entry.new: ; CHECK-NEXT: br label %header diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll index 1b9c8c5..8fcb503 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -16,8 +16,17 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] ; CHECK: if.then: -; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0 @@ -25,68 +34,68 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], -; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] -; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] -; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, [[TBAA4:!tbaa !.*]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 -; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 -; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 -; CHECK-NEXT: store i32 [[TMP25]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], +; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4:!tbaa !.*]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 ; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 ; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2 +; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3 +; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, [[TBAA4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] ; CHECK-NEXT: br label [[FOR_BODY68:%.*]] ; CHECK: for.body68: ; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 ; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 -; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] -; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP31]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] +; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32 ; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 ; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] ; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 ; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] ; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 017bce6..4f3fd28 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -291,22 +291,21 @@ for.end: ; preds = %for.end.loopexit, % define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_with_fast_math( ; AUTO_VEC-NEXT: entry: -; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; AUTO_VEC-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 ; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to double -; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul fast double [[CAST_CRD]], 3.000000e+00 -; AUTO_VEC-NEXT: [[TMP2:%.*]] = add nsw i64 [[N_VEC]], -16 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 4 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 48 -; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double [[CAST_CRD]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[N_VEC]], -16 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 4 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 48 +; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], 2305843009213693948 +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 2305843009213693948 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] @@ -315,69 +314,69 @@ define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP7]], align 8 -; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP6]], i64 4 -; AUTO_VEC-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP9]], align 8 -; AUTO_VEC-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP6]], i64 8 -; AUTO_VEC-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], <4 x double>* [[TMP11]], align 8 -; AUTO_VEC-NEXT: [[TMP12:%.*]] = getelementptr double, double* [[TMP6]], i64 12 -; AUTO_VEC-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP12]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], <4 x double>* [[TMP13]], align 8 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] +; AUTO_VEC-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP5]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP6]], align 8 +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[TMP5]], i64 4 +; AUTO_VEC-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP8]], align 8 +; AUTO_VEC-NEXT: [[TMP9:%.*]] = getelementptr double, double* [[TMP5]], i64 8 +; AUTO_VEC-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], <4 x double>* [[TMP10]], align 8 +; AUTO_VEC-NEXT: [[TMP11:%.*]] = getelementptr double, double* [[TMP5]], i64 12 +; AUTO_VEC-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], <4 x double>* [[TMP12]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD2_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD3_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]] -; AUTO_VEC-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP15]], align 8 -; AUTO_VEC-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[TMP14]], i64 4 -; AUTO_VEC-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP17]], align 8 -; AUTO_VEC-NEXT: [[TMP18:%.*]] = getelementptr double, double* [[TMP14]], i64 8 -; AUTO_VEC-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_1]], <4 x double>* [[TMP19]], align 8 -; AUTO_VEC-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[TMP14]], i64 12 -; AUTO_VEC-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_1]], <4 x double>* [[TMP21]], align 8 +; AUTO_VEC-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]] +; AUTO_VEC-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP14]], align 8 +; AUTO_VEC-NEXT: [[TMP15:%.*]] = getelementptr double, double* [[TMP13]], i64 4 +; AUTO_VEC-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP16]], align 8 +; AUTO_VEC-NEXT: [[TMP17:%.*]] = getelementptr double, double* [[TMP13]], i64 8 +; AUTO_VEC-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_1]], <4 x double>* [[TMP18]], align 8 +; AUTO_VEC-NEXT: [[TMP19:%.*]] = getelementptr double, double* [[TMP13]], i64 12 +; AUTO_VEC-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_1]], <4 x double>* [[TMP20]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD2_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD3_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]] -; AUTO_VEC-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP23]], align 8 -; AUTO_VEC-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[TMP22]], i64 4 -; AUTO_VEC-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP25]], align 8 -; AUTO_VEC-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[TMP22]], i64 8 -; AUTO_VEC-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_2]], <4 x double>* [[TMP27]], align 8 -; AUTO_VEC-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP22]], i64 12 -; AUTO_VEC-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_2]], <4 x double>* [[TMP29]], align 8 +; AUTO_VEC-NEXT: [[TMP21:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]] +; AUTO_VEC-NEXT: [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP22]], align 8 +; AUTO_VEC-NEXT: [[TMP23:%.*]] = getelementptr double, double* [[TMP21]], i64 4 +; AUTO_VEC-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP24]], align 8 +; AUTO_VEC-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[TMP21]], i64 8 +; AUTO_VEC-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_2]], <4 x double>* [[TMP26]], align 8 +; AUTO_VEC-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[TMP21]], i64 12 +; AUTO_VEC-NEXT: [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_2]], <4 x double>* [[TMP28]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD2_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD3_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP30:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]] -; AUTO_VEC-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP31]], align 8 -; AUTO_VEC-NEXT: [[TMP32:%.*]] = getelementptr double, double* [[TMP30]], i64 4 -; AUTO_VEC-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP33]], align 8 -; AUTO_VEC-NEXT: [[TMP34:%.*]] = getelementptr double, double* [[TMP30]], i64 8 -; AUTO_VEC-NEXT: [[TMP35:%.*]] = bitcast double* [[TMP34]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_3]], <4 x double>* [[TMP35]], align 8 -; AUTO_VEC-NEXT: [[TMP36:%.*]] = getelementptr double, double* [[TMP30]], i64 12 -; AUTO_VEC-NEXT: [[TMP37:%.*]] = bitcast double* [[TMP36]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_3]], <4 x double>* [[TMP37]], align 8 +; AUTO_VEC-NEXT: [[TMP29:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]] +; AUTO_VEC-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP30]], align 8 +; AUTO_VEC-NEXT: [[TMP31:%.*]] = getelementptr double, double* [[TMP29]], i64 4 +; AUTO_VEC-NEXT: [[TMP32:%.*]] = bitcast double* [[TMP31]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP32]], align 8 +; AUTO_VEC-NEXT: [[TMP33:%.*]] = getelementptr double, double* [[TMP29]], i64 8 +; AUTO_VEC-NEXT: [[TMP34:%.*]] = bitcast double* [[TMP33]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_3]], <4 x double>* [[TMP34]], align 8 +; AUTO_VEC-NEXT: [[TMP35:%.*]] = getelementptr double, double* [[TMP29]], i64 12 +; AUTO_VEC-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_3]], <4 x double>* [[TMP36]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 64 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 @@ -395,18 +394,18 @@ define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-NEXT: [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], ; AUTO_VEC-NEXT: [[STEP_ADD2_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], ; AUTO_VEC-NEXT: [[STEP_ADD3_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[TMP38:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]] -; AUTO_VEC-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP39]], align 8 -; AUTO_VEC-NEXT: [[TMP40:%.*]] = getelementptr double, double* [[TMP38]], i64 4 -; AUTO_VEC-NEXT: [[TMP41:%.*]] = bitcast double* [[TMP40]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP41]], align 8 -; AUTO_VEC-NEXT: [[TMP42:%.*]] = getelementptr double, double* [[TMP38]], i64 8 -; AUTO_VEC-NEXT: [[TMP43:%.*]] = bitcast double* [[TMP42]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_EPIL]], <4 x double>* [[TMP43]], align 8 -; AUTO_VEC-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[TMP38]], i64 12 -; AUTO_VEC-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_EPIL]], <4 x double>* [[TMP45]], align 8 +; AUTO_VEC-NEXT: [[TMP37:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]] +; AUTO_VEC-NEXT: [[TMP38:%.*]] = bitcast double* [[TMP37]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP38]], align 8 +; AUTO_VEC-NEXT: [[TMP39:%.*]] = getelementptr double, double* [[TMP37]], i64 4 +; AUTO_VEC-NEXT: [[TMP40:%.*]] = bitcast double* [[TMP39]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP40]], align 8 +; AUTO_VEC-NEXT: [[TMP41:%.*]] = getelementptr double, double* [[TMP37]], i64 8 +; AUTO_VEC-NEXT: [[TMP42:%.*]] = bitcast double* [[TMP41]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2_EPIL]], <4 x double>* [[TMP42]], align 8 +; AUTO_VEC-NEXT: [[TMP43:%.*]] = getelementptr double, double* [[TMP37]], i64 12 +; AUTO_VEC-NEXT: [[TMP44:%.*]] = bitcast double* [[TMP43]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3_EPIL]], <4 x double>* [[TMP44]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 16 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], ; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 @@ -414,13 +413,13 @@ define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], [[LOOP8:!llvm.loop !.*]] ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; AUTO_VEC-NEXT: [[TMP46:%.*]] = add nsw i64 [[N_VEC]], -1 -; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP46]] to double -; AUTO_VEC-NEXT: [[TMP47:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP45:%.*]] = add nsw i64 [[N_VEC]], -1 +; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP45]] to double +; AUTO_VEC-NEXT: [[TMP46:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, double* [[A]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], double* [[T0]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 @@ -428,7 +427,7 @@ define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]] ; AUTO_VEC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; AUTO_VEC: for.end: -; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP47]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP46]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: @@ -452,12 +451,11 @@ for.end: define double @external_use_without_fast_math(double* %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_without_fast_math( ; AUTO_VEC-NEXT: entry: -; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; AUTO_VEC-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], -1 +; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) +; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[SMAX]], 7 -; AUTO_VEC-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 -; AUTO_VEC-NEXT: br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; AUTO_VEC-NEXT: br i1 [[TMP1]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; AUTO_VEC: entry.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 47456e1..b90aaa3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -8,26 +8,24 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-LABEL: @inv_load_conditional( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 +; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* +; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX4]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775792 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 @@ -35,27 +33,26 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP4]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> undef), !alias.scope !3 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> undef), !alias.scope !3 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8 +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX4]], 8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX9:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX9:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX9]], 9223372036854775800 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer @@ -64,18 +61,18 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX12]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX12]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX12]], 8 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC11]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC11]] +; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP9]], <8 x i32> undef) -; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> +; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef) +; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX9]], [[N_VEC11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i32 7 ; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -95,10 +92,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], [[LOOP9:!llvm.loop !.*]] ; CHECK: for.end.loopexit: -; CHECK-NEXT: [[A_LCSSA_LCSSA8:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[A_LCSSA_LCSSA8:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA8]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA8]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[A_LCSSA_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 754eab8..4640de40 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -11,60 +11,58 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 64 +; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX3]] +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B2]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775744 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775744 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 16 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 16 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 32 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 32 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 48 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 48 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI5]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[TMP12]] = add <16 x i32> [[VEC_PHI6]], [[WIDE_LOAD9]] -; CHECK-NEXT: [[TMP13]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]] +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP8]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9]] = add <16 x i32> [[VEC_PHI5]], [[WIDE_LOAD8]] +; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI6]], [[WIDE_LOAD9]] +; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]] ; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <16 x i32> [[TMP12]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX12:%.*]] = add <16 x i32> [[TMP13]], [[BIN_RDX11]] -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX12]]) -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX12:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX12]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -77,7 +75,7 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP7:!llvm.loop !.*]] ; CHECK: for.end: -; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[T4]] ; entry: @@ -106,26 +104,24 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-LABEL: @inv_val_store_to_inv_address_conditional( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 +; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* +; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX4]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775792 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 @@ -135,27 +131,26 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !8, !noalias !11 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP5]], align 4, !alias.scope !8, !noalias !11 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT7]], <16 x i32*> [[BROADCAST_SPLAT9]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !11 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT7]], <16 x i32*> [[BROADCAST_SPLAT9]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !11 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP13:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP13:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8 +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX4]], 8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX10:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX10:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX10]], 9223372036854775800 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer @@ -166,16 +161,16 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX13]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD16]], [[BROADCAST_SPLAT18]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT20]], <8 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT20]], <8 x i32*> [[BROADCAST_SPLAT22]], i32 4, <8 x i1> [[TMP10]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX13]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD16]], [[BROADCAST_SPLAT18]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT20]], <8 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT20]], <8 x i32*> [[BROADCAST_SPLAT22]], i32 4, <8 x i1> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT14]] = add i64 [[INDEX13]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC12]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC12]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC12]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -230,36 +225,34 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-LABEL: @variant_val_store_to_inv_address_conditional( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 +; CHECK-NEXT: [[SMAX15:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX15]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[C5:%.*]] = bitcast i32* [[C:%.*]] to i8* +; CHECK-NEXT: [[C4:%.*]] = bitcast i32* [[C:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1 -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[C]], i64 [[SMAX2]] +; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8* +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1 +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[C]], i64 [[SMAX]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND08:%.*]] = icmp ugt i32* [[SCEVGEP6]], [[B]] -; CHECK-NEXT: [[BOUND19:%.*]] = icmp ugt i32* [[SCEVGEP]], [[C]] -; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] -; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT10]] -; CHECK-NEXT: [[BOUND012:%.*]] = icmp ugt i32* [[SCEVGEP6]], [[A]] -; CHECK-NEXT: [[BOUND113:%.*]] = icmp ugt i8* [[UGLYGEP]], [[C5]] -; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] -; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: [[BOUND07:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]] +; CHECK-NEXT: [[BOUND18:%.*]] = icmp ugt i32* [[SCEVGEP]], [[C]] +; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT9]] +; CHECK-NEXT: [[BOUND011:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[A]] +; CHECK-NEXT: [[BOUND112:%.*]] = icmp ugt i8* [[UGLYGEP]], [[C4]] +; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]] +; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT13]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX14]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK16:%.*]] = icmp ult i64 [[SMAX]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK16:%.*]] = icmp ult i64 [[SMAX15]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK16]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX15]], 9223372036854775792 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 @@ -269,30 +262,29 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !17, !noalias !20 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP5]], align 4, !alias.scope !17, !noalias !20 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP7]], i32 8, <16 x i1> [[TMP4]], <16 x i32> poison), !alias.scope !23 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT20]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !24, !noalias !23 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !17, !noalias !20 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !23 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT20]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP25:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP25:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX15]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8 +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX15]], 8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX21:%.*]] = select i1 [[TMP9]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX21:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[N_VEC23:%.*]] = and i64 [[SMAX21]], 9223372036854775800 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> poison, <8 x i32> zeroinitializer @@ -303,19 +295,19 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX24]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD27]], [[BROADCAST_SPLAT29]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT31]], <8 x i32>* [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX24]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP15]], i32 8, <8 x i1> [[TMP12]], <8 x i32> poison) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP12]]) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX24]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i32>, <8 x i32>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD27]], [[BROADCAST_SPLAT29]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT31]], <8 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX24]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP12]], i32 8, <8 x i1> [[TMP9]], <8 x i32> poison) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]] -; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]] +; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[SMAX21]], [[N_VEC23]] ; CHECK-NEXT: br i1 [[CMP_N26]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 63bad59..b6d02ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -14,55 +14,53 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp ; CHECK-NEXT: [[DOT11:%.*]] = bitcast i8 addrspace(1)* [[DOT10]] to i8 addrspace(1)* addrspace(1)* ; CHECK-NEXT: [[DOT12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 16 ; CHECK-NEXT: [[DOT13:%.*]] = bitcast i8 addrspace(1)* [[DOT12]] to i8 addrspace(1)* addrspace(1)* -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 16 +; CHECK-NEXT: [[UMAX2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2:%.*]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX2]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], 1 -; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP4]], i64 [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[UMAX1]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 16 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP6]] -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP6]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP2]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[UMAX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 16 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP4]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[DOT12]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX]], -16 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX2]], -16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP5]] to <4 x i8 addrspace(1)*> addrspace(1)* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP6]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 4 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP7]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 8 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP9]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 12 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP11]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 12 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP13]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 4 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP15]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 4 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 8 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP17]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 8 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 12 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP19]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 12 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP21]] to <4 x i8 addrspace(1)*> addrspace(1)* -; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP22]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index a833b98..8ea9bc1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -39,63 +39,61 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8 +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[UMIN1]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP6]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMIN1]] -; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]]) +; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[CONV3]], -1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[UMIN]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255 -; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]] -; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255 +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]] ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4 -; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1 -; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1 -; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32 -; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP20]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], -4 +; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP26:%.*]] = add i8 [[TMP22]], -1 +; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1 +; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP26]] to i32 +; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]] +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] @@ -106,7 +104,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], [[LOOP2:!llvm.loop !.*]] ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 7d4a3c5..b24553e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -437,41 +437,39 @@ exit: define i16 @multiple_exit(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_exit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP11]], <4 x i16>* [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] @@ -524,41 +522,39 @@ if.end: define i16 @multiple_exit2(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_exit2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP11]], <4 x i16>* [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index bc132d9..f937e5e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -649,13 +649,14 @@ for.end: define i32 @sink_into_replication_region(i32 %y) { ; CHECK-LABEL: @sink_into_replication_region( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]] ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -741,13 +742,14 @@ bb: define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-LABEL: @sink_into_replication_region_multiple( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]] ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 3c8d073..d2991cb 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -196,10 +196,53 @@ define void @bug18724(i1 %cond) { ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) +; UNROLL-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef +; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL: vector.ph: +; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 +; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; UNROLL-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] +; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL: vector.body: +; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] +; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ] +; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ] +; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] +; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] +; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] +; UNROLL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4 +; UNROLL-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE4]] +; UNROLL: pred.store.if: +; UNROLL-NEXT: store i32 2, i32* [[TMP4]], align 4 +; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE4]] +; UNROLL: pred.store.continue4: +; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], 1 +; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[VEC_PHI2]], 1 +; UNROLL-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP8]] +; UNROLL-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP9]] +; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] +; UNROLL: middle.block: +; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; UNROLL-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true +; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP11]]) +; UNROLL-NEXT: br label [[SCALAR_PH]] +; UNROLL: scalar.ph: +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] +; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[FOR_BODY14:%.*]] ; UNROLL: for.body14: -; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] +; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] ; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 ; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] @@ -221,8 +264,16 @@ define void @bug18724(i1 %cond) { ; UNROLL-NOSIMPLIFY: for.body9: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND:%.*]], label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]] ; UNROLL-NOSIMPLIFY: for.body14.preheader: -; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NOSIMPLIFY-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) +; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], undef +; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; UNROLL-NOSIMPLIFY-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NOSIMPLIFY: vector.ph: +; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 +; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] @@ -231,33 +282,33 @@ define void @bug18724(i1 %cond) { ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP0]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP3]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] ; UNROLL-NOSIMPLIFY: pred.store.if3: -; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP1]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP4]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE4]] ; UNROLL-NOSIMPLIFY: pred.store.continue4: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i32 [[VEC_PHI]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i32 [[VEC_PHI2]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP4]] -; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i32 [[VEC_PHI]], 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI2]], 1 +; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP7]] +; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP8]] ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] -; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[FOR_BODY14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY14:%.*]] ; UNROLL-NOSIMPLIFY: for.body14: @@ -287,10 +338,49 @@ define void @bug18724(i1 %cond) { ; VEC-NEXT: entry: ; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; VEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) +; VEC-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef +; VEC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC: vector.ph: +; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 +; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; VEC-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] +; VEC-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC: vector.body: +; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] +; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] +; VEC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[TMP4]] +; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +; VEC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* +; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4 +; VEC-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] +; VEC: pred.store.if: +; VEC-NEXT: store i32 2, i32* [[TMP5]], align 4 +; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] +; VEC: pred.store.continue2: +; VEC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_PHI]], +; VEC-NEXT: [[PREDPHI]] = select <2 x i1> undef, <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP8]] +; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; VEC: middle.block: +; VEC-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]]) +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; VEC-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true +; VEC-NEXT: call void @llvm.assume(i1 [[TMP11]]) +; VEC-NEXT: br label [[SCALAR_PH]] +; VEC: scalar.ph: +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] +; VEC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; VEC-NEXT: br label [[FOR_BODY14:%.*]] ; VEC: for.body14: -; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] +; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] ; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 ; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] @@ -367,7 +457,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL: pred.store.continue6: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -386,7 +476,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -485,7 +575,7 @@ define void @minimal_bit_widths(i1 %c) { ; VEC: pred.store.continue3: ; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -504,7 +594,7 @@ define void @minimal_bit_widths(i1 %c) { ; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; VEC: for.end: ; VEC-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index d5e299d..bd4936c 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -494,35 +494,34 @@ for.body: ; preds = %for.body, %entry define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) { ; CHECK-LABEL: @even_load_dynamic_tc( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N:%.*]], 2 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8 +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 2) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP3]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[INDEX]], 9223372036854775804 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[INDEX]], 9223372036854775804 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -920,46 +919,45 @@ for.body: ; preds = %for.body, %entry define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) { ; CHECK-LABEL: @PR27626_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 -; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP6]], align 4 +; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[Z]], i32* [[TMP6]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP7]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP8]], align 4 -; CHECK-NEXT: store i32 [[Z]], i32* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 ; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 ; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 ; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 -; CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1010,65 +1008,64 @@ for.end: define i32 @PR27626_1(%pair.i32 *%p, i64 %n) { ; CHECK-LABEL: @PR27626_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 ; CHECK-NEXT: store i32 [[TMP12]], i32* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 ; CHECK-NEXT: store i32 [[TMP13]], i32* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 ; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 -; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP16]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP21:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP20:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP20]], i32* [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP21]] = add nsw i32 [[TMP20]], [[S]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[P_I_X]], align 4 +; CHECK-NEXT: store i32 [[TMP19]], i32* [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP20]] = add nsw i32 [[TMP19]], [[S]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP27:!llvm.loop !.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[TMP21]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[TMP22]] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[TMP20]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[TMP21]] ; entry: br label %for.body @@ -1105,47 +1102,46 @@ for.end: define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) { ; CHECK-LABEL: @PR27626_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 -1, i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 -1, i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 -; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP6]], align 4 +; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[Z]], i32* [[TMP6]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP7]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP8]], align 4 -; CHECK-NEXT: store i32 [[Z]], i32* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 ; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 ; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 ; CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP13]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 -; CHECK-NEXT: store i32 [[TMP19]], i32* [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP28:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP28:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1157,8 +1153,8 @@ define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) { ; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 -1, i32 0 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 ; CHECK-NEXT: store i32 [[Z]], i32* [[P_I_X]], align 4 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[P_I_MINUS_1_X]], align 4 -; CHECK-NEXT: store i32 [[TMP21]], i32* [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[P_I_MINUS_1_X]], align 4 +; CHECK-NEXT: store i32 [[TMP20]], i32* [[P_I_Y]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP29:!llvm.loop !.*]] @@ -1199,73 +1195,72 @@ for.end: define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) { ; CHECK-LABEL: @PR27626_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 -; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 -; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 -; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 -; CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP13]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 +; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 +; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 +; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP20]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP19]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP30:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP30:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]]) +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP19]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP25:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP24:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 ; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I_PLUS_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP23]], i32* [[P_I_PLUS_1_Y]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP25]] = add nsw i32 [[TMP24]], [[S]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[P_I_X]], align 4 +; CHECK-NEXT: store i32 [[TMP22]], i32* [[P_I_PLUS_1_Y]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP24]] = add nsw i32 [[TMP23]], [[S]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP31:!llvm.loop !.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[TMP25]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[TMP26]] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ [[TMP24]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[TMP25]] ; entry: br label %for.body @@ -1306,15 +1301,14 @@ for.end: define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-LABEL: @PR27626_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 2 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 6 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775804 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -1324,28 +1318,28 @@ define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1 -; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1 +; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[X]], i32* [[TMP8]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[TMP9]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[TMP10]], align 4 -; CHECK-NEXT: store i32 [[X]], i32* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <8 x i32>* +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <8 x i32> -; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP32:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP32:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1397,66 +1391,65 @@ for.end: define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-LABEL: @PR27626_5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 5 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 5 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], -4 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 6 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775804 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP4]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP3]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[TMP5]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], 7 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP29]] -; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP16]], align 4 -; CHECK-NEXT: store i32 [[X]], i32* [[TMP18]], align 4 -; CHECK-NEXT: store i32 [[X]], i32* [[TMP20]], align 4 -; CHECK-NEXT: store i32 [[X]], i32* [[TMP22]], align 4 -; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[TMP24]], align 4 -; CHECK-NEXT: store i32 [[Y]], i32* [[TMP26]], align 4 -; CHECK-NEXT: store i32 [[Y]], i32* [[TMP28]], align 4 -; CHECK-NEXT: store i32 [[Y]], i32* [[TMP30]], align 4 -; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]] +; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP15]], align 4 +; CHECK-NEXT: store i32 [[X]], i32* [[TMP17]], align 4 +; CHECK-NEXT: store i32 [[X]], i32* [[TMP19]], align 4 +; CHECK-NEXT: store i32 [[X]], i32* [[TMP21]], align 4 +; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[TMP23]], align 4 +; CHECK-NEXT: store i32 [[Y]], i32* [[TMP25]], align 4 +; CHECK-NEXT: store i32 [[Y]], i32* [[TMP27]], align 4 +; CHECK-NEXT: store i32 [[Y]], i32* [[TMP29]], align 4 +; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[Z]], i32* [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP12]], align 4 ; CHECK-NEXT: store i32 [[Z]], i32* [[TMP13]], align 4 -; CHECK-NEXT: store i32 [[Z]], i32* [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP34:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP34:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index bf6e873..6dcbd790 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -166,15 +166,13 @@ for.end: ; preds = %for.body ; CHECK-LABEL: inv_val_store_to_inv_address_conditional_diff_values_ic ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]] @@ -272,15 +270,13 @@ for.end: ; preds = %for.body ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]] @@ -367,16 +363,14 @@ for.end: ; preds = %for.body define i32 @variant_val_store_to_inv_address(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-LABEL: @variant_val_store_to_inv_address( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 +; CHECK-NEXT: [[SMAX3:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX3]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B2]] diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index bb0ad1c..29ba6d7 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -7,28 +7,27 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define void @bottom_tested(i16* %p, i32 %n) { ; CHECK-LABEL: @bottom_tested( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -46,45 +45,44 @@ define void @bottom_tested(i16* %p, i32 %n) { ; ; TAILFOLD-LABEL: @bottom_tested( ; TAILFOLD-NEXT: entry: -; TAILFOLD-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; TAILFOLD-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; TAILFOLD-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 +; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 ; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TAILFOLD: vector.ph: -; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1 +; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 ; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 ; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] ; TAILFOLD: vector.body: ; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] -; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; TAILFOLD-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 1 -; TAILFOLD-NEXT: [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; TAILFOLD-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> -; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> +; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TAILFOLD: pred.store.if: -; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] -; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 +; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] +; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] ; TAILFOLD: pred.store.continue: -; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; TAILFOLD-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.if1: -; TAILFOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 -; TAILFOLD-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]] -; TAILFOLD-NEXT: store i16 0, i16* [[TMP11]], align 4 +; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] +; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.continue2: ; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; TAILFOLD-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; TAILFOLD-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; TAILFOLD: middle.block: ; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] ; TAILFOLD: scalar.ph: @@ -120,33 +118,32 @@ if.end: define void @early_exit(i16* %p, i32 %n) { ; CHECK-LABEL: @early_exit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -258,35 +255,33 @@ if.end: define void @multiple_unique_exit(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_unique_exit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -346,35 +341,33 @@ if.end: define i32 @multiple_unique_exit2(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_unique_exit2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] @@ -438,35 +431,33 @@ if.end: define i32 @multiple_unique_exit3(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_unique_exit3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -- 2.7.4