From: Philip Reames Date: Thu, 21 Jul 2022 22:27:33 +0000 (-0700) Subject: [LV] Add a load focused version of the r45679 test X-Git-Tag: upstream/15.0.7~892 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=54cb87964d3513c0765d68bbae594d09f75e2e25;p=platform%2Fupstream%2Fllvm.git [LV] Add a load focused version of the r45679 test This a reproducer for bug in predicated instruction handling. The final result code is correct, but the reasoning by which we get there isn't. --- diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 78c22a9..f9a9cce 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -55,7 +55,7 @@ define void @pr45679(i32* %A) optsize { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -67,7 +67,7 @@ define void @pr45679(i32* %A) optsize { ; CHECK-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -117,7 +117,7 @@ define void @pr45679(i32* %A) optsize { ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2UF2: middle.block: ; VF2UF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2UF2: scalar.ph: @@ -129,7 +129,7 @@ define void @pr45679(i32* %A) optsize { ; VF2UF2-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 ; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; VF2UF2: exit: ; VF2UF2-NEXT: ret void ; @@ -139,46 +139,46 @@ define void @pr45679(i32* %A) optsize { ; VF1UF4: vector.ph: ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: -; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] -; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 -; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13 -; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13 -; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13 -; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV4]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV5]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], 13 ; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[SUNK_IND0:%.*]] = add i32 [[INDEX]], 0 -; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[SUNK_IND0]] +; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; VF1UF4: pred.store.if7: -; VF1UF4-NEXT: [[SUNK_IND1:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND1]] +; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE5]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] ; VF1UF4: pred.store.continue8: -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; VF1UF4: pred.store.if9: -; VF1UF4-NEXT: [[SUNK_IND2:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND2]] +; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE7]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] ; VF1UF4: pred.store.continue10: -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] ; VF1UF4: pred.store.if11: -; VF1UF4-NEXT: [[SUNK_IND3:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND3]] +; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] ; VF1UF4: pred.store.continue12: ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] +; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF1UF4: middle.block: ; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF1UF4: scalar.ph: @@ -190,7 +190,7 @@ define void @pr45679(i32* %A) optsize { ; VF1UF4-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 ; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] +; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; VF1UF4: exit: ; VF1UF4-NEXT: ret void ; @@ -208,3 +208,226 @@ loop: exit: ret void } + +define void @load_variant(i64* noalias %a, i64* noalias %b) { +; CHECK-LABEL: @load_variant( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +; CHECK-NEXT: store i64 [[TMP4]], i64* [[B:%.*]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +; CHECK-NEXT: store i64 [[TMP9]], i64* [[B]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +; CHECK-NEXT: store i64 [[TMP14]], i64* [[B]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP18]], align 8 +; CHECK-NEXT: store i64 [[TMP19]], i64* [[B]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: store i64 [[V]], i64* [[B]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +; VF2UF2-LABEL: @load_variant( +; VF2UF2-NEXT: entry: +; VF2UF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2UF2: vector.ph: +; VF2UF2-NEXT: br label [[VECTOR_BODY:%.*]] +; VF2UF2: vector.body: +; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] +; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], +; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF2UF2: pred.store.if: +; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP3]] +; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +; VF2UF2-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8 +; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] +; VF2UF2: pred.store.continue: +; VF2UF2-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] +; VF2UF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; VF2UF2: pred.store.if2: +; VF2UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; VF2UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]] +; VF2UF2-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +; VF2UF2-NEXT: store i64 [[TMP10]], i64* [[B]], align 8 +; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]] +; VF2UF2: pred.store.continue3: +; VF2UF2-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ] +; VF2UF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; VF2UF2: pred.store.if4: +; VF2UF2-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2 +; VF2UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP13]] +; VF2UF2-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +; VF2UF2-NEXT: store i64 [[TMP15]], i64* [[B]], align 8 +; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] +; VF2UF2: pred.store.continue5: +; VF2UF2-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ] +; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; VF2UF2: pred.store.if6: +; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 +; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP18]] +; VF2UF2-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP19]], align 8 +; VF2UF2-NEXT: store i64 [[TMP20]], i64* [[B]], align 8 +; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] +; VF2UF2: pred.store.continue7: +; VF2UF2-NEXT: [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ] +; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2UF2: middle.block: +; VF2UF2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VF2UF2: scalar.ph: +; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF2UF2-NEXT: br label [[FOR_BODY:%.*]] +; VF2UF2: for.body: +; VF2UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; VF2UF2-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; VF2UF2-NEXT: store i64 [[V]], i64* [[B]], align 8 +; VF2UF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; VF2UF2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 +; VF2UF2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2UF2: for.end: +; VF2UF2-NEXT: ret void +; +; VF1UF4-LABEL: @load_variant( +; VF1UF4-NEXT: entry: +; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF1UF4: vector.ph: +; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] +; VF1UF4: vector.body: +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 +; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 +; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 +; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13 +; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF1UF4: pred.store.if: +; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 +; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDUCTION]] +; VF1UF4-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +; VF1UF4-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] +; VF1UF4: pred.store.continue: +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; VF1UF4: pred.store.if7: +; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]] +; VF1UF4-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8 +; VF1UF4-NEXT: store i64 [[TMP7]], i64* [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] +; VF1UF4: pred.store.continue8: +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; VF1UF4: pred.store.if9: +; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]] +; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +; VF1UF4-NEXT: store i64 [[TMP9]], i64* [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] +; VF1UF4: pred.store.continue10: +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] +; VF1UF4: pred.store.if11: +; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]] +; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 +; VF1UF4-NEXT: store i64 [[TMP11]], i64* [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] +; VF1UF4: pred.store.continue12: +; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF1UF4: middle.block: +; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VF1UF4: scalar.ph: +; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF1UF4-NEXT: br label [[FOR_BODY:%.*]] +; VF1UF4: for.body: +; VF1UF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; VF1UF4-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; VF1UF4-NEXT: store i64 [[V]], i64* [[B]], align 8 +; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 +; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF1UF4: for.end: +; VF1UF4-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv + %v = load i64, i64* %arrayidx + store i64 %v, i64* %b + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 14 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +}