From 78914e8c326e266a55e35c7ec724be8c8393bb6a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 8 Jan 2023 15:52:00 +0000 Subject: [PATCH] [VPlan] Keep entries in worklist in sinkScalarOperands. Not removing the entries ensures that duplicates are avoided, reducing the number of iterations. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- .../Transforms/LoopVectorize/X86/small-size.ll | 8 ++++---- ...first-order-recurrence-sink-replicate-region.ll | 8 ++++---- .../Transforms/LoopVectorize/float-induction.ll | 22 +++++++++++----------- .../Transforms/LoopVectorize/if-pred-stores.ll | 8 ++++---- .../LoopVectorize/vplan-sink-scalars-and-merge.ll | 6 +++--- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 057a328..d8fd7cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -126,10 +126,10 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { bool ScalarVFOnly = Plan.hasScalarVFOnly(); // Try to sink each replicate or scalar IV steps recipe in the worklist. - while (!WorkList.empty()) { + for (unsigned I = 0; I != WorkList.size(); ++I) { VPBasicBlock *SinkTo; VPRecipeBase *SinkCandidate; - std::tie(SinkTo, SinkCandidate) = WorkList.pop_back_val(); + std::tie(SinkTo, SinkCandidate) = WorkList[I]; if (SinkCandidate->getParent() == SinkTo || SinkCandidate->mayHaveSideEffects() || SinkCandidate->mayReadOrWriteMemory()) diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 4dc3a25..2f5f6f1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -159,8 +159,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] ; CHECK: pred.store.continue24: @@ -172,8 +172,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]] ; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] ; CHECK: pred.store.continue26: @@ -185,8 +185,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]] ; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] ; CHECK: pred.store.continue28: @@ -198,8 +198,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 -; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]] ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]] ; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] ; CHECK: pred.store.continue30: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 2ccb573..63f9815 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -53,8 +53,8 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -123,9 +123,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -287,10 +287,10 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> -; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%add.1>, ir<%conv.lv.2> +; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index f808a9a..6420f82 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1380,8 +1380,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] ; VEC4_INTERL1: pred.store.if3: ; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL1-NEXT: store float [[TMP7]], ptr [[TMP9]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]] ; VEC4_INTERL1: pred.store.continue4: @@ -1389,8 +1389,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL1: pred.store.if5: ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 -; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] +; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL1-NEXT: store float [[TMP11]], ptr [[TMP13]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]] ; VEC4_INTERL1: pred.store.continue6: @@ -1398,8 +1398,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] ; VEC4_INTERL1: pred.store.if7: ; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 -; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL1-NEXT: store float [[TMP15]], ptr [[TMP17]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VEC4_INTERL1: pred.store.continue8: @@ -1461,8 +1461,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; VEC4_INTERL2: pred.store.if4: ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP11]], ptr [[TMP13]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] ; VEC4_INTERL2: pred.store.continue5: @@ -1470,8 +1470,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; VEC4_INTERL2: pred.store.if6: ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 -; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP15]], ptr [[TMP17]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE6]] ; VEC4_INTERL2: pred.store.continue7: @@ -1479,16 +1479,16 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; VEC4_INTERL2: pred.store.if8: ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP19]], ptr [[TMP21]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE8]] ; VEC4_INTERL2: pred.store.continue9: ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; VEC4_INTERL2: pred.store.if10: -; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP23]], ptr [[TMP24]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE10]] ; VEC4_INTERL2: pred.store.continue11: @@ -1496,8 +1496,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; VEC4_INTERL2: pred.store.if12: ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 -; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]] +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP26]], ptr [[TMP28]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE12]] ; VEC4_INTERL2: pred.store.continue13: @@ -1505,8 +1505,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; VEC4_INTERL2: pred.store.if14: ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 -; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP30]], ptr [[TMP32]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE14]] ; VEC4_INTERL2: pred.store.continue15: @@ -1514,8 +1514,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.if16: ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 -; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]] +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: store float [[TMP34]], ptr [[TMP36]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.continue17: @@ -1631,8 +1631,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; VEC2_INTERL1_PRED_STORE: pred.store.if3: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], ptr [[TMP9]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]] ; VEC2_INTERL1_PRED_STORE: pred.store.continue4: diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index a742e28..e3e2743 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -121,9 +121,9 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC: pred.store.if: +; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP0]] ; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP5]], 20 -; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP0]] ; VEC-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: @@ -131,9 +131,9 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if1: ; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; VEC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] ; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 20 -; VEC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] ; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: @@ -572,8 +572,8 @@ define void @minimal_bit_widths(i1 %c) { ; VEC: pred.store.if: ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; VEC-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 ; VEC-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 ; VEC-NEXT: store i8 [[TMP6]], ptr [[TMP7]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: @@ -583,8 +583,8 @@ define void @minimal_bit_widths(i1 %c) { ; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 -; VEC-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 ; VEC-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr undef, i64 [[TMP9]] +; VEC-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 ; VEC-NEXT: store i8 [[TMP12]], ptr [[TMP13]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]] ; VEC: pred.store.continue3: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index a98035c..def1a63 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -35,8 +35,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> -; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]] +; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> ; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -115,8 +115,8 @@ exit: ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%add> = add vp<[[PRED]]>, ir<10> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul> +; CHECK-NEXT: REPLICATE ir<%add> = add vp<[[PRED]]>, ir<10> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -868,8 +868,8 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%l1> = load ir<%src> -; CHECK-NEXT: REPLICATE ir<%cmp> = icmp ir<%l1>, ir<0> ; CHECK-NEXT: REPLICATE ir<%l2> = trunc ir<%l1> +; CHECK-NEXT: REPLICATE ir<%cmp> = icmp ir<%l1>, ir<0> ; CHECK-NEXT: REPLICATE ir<%sel> = select ir<%cmp>, ir<5>, ir<%l2> ; CHECK-NEXT: REPLICATE store ir<%sel>, ir<%dst> ; CHECK-NEXT: Successor(s): pred.store.continue -- 2.7.4