target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
-define void @foo() {
+define void @foo(i64* %ptr, i32* %ptr.2) {
; CHECK-LABEL: @foo(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND_TRUNC:%.+]] = phi <4 x i32> [ <i32 2, i32 3, i32 4, i32 5>, %vector.ph ], [ [[VEC_IND_TRUNC_NEXT:%.+]], %vector.body ]
+; CHECK-NEXT: = add i64 [[INDEX]], 0
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TRUNC]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TRUNC]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TRUNC]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TRUNC]], 3
+; CHECK-NEXT: store i32 [[TMP7]], i32* %ptr.2, align 4
+; CHECK-NEXT: store i32 [[TMP8]], i32* %ptr.2, align 4
+; CHECK-NEXT: store i32 [[TMP9]], i32* %ptr.2, align 4
+; CHECK-NEXT: store i32 [[TMP10]], i32* %ptr.2, align 4
+; CHECK: store <4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[VEC_IND_TRUNC_NEXT]] = add <4 x i32> [[VEC_IND_TRUNC]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
;
entry:
br label %loop
loop:
+ %can.iv = phi i64 [ 0, %entry ], [ %can.iv.next, %loop ]
%0 = phi i64 [ 2, %entry ], [ %3, %loop ]
%1 = and i64 %0, 4294967295
%2 = trunc i64 %0 to i32
+ store i32 %2, i32* %ptr.2
+ %gep.ptr = getelementptr inbounds i64, i64* %ptr, i64 %can.iv
+ store i64 %0, i64* %gep.ptr
%3 = add nuw nsw i64 %1, 1
%4 = icmp sgt i32 %2, 80
+ %can.iv.next = add nuw nsw i64 %can.iv, 1
br i1 %4, label %exit, label %loop
exit:
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-@a = common local_unnamed_addr global i32 0, align 4
@b = common local_unnamed_addr global i8 0, align 1
-; Function Attrs: norecurse nounwind uwtable
-define void @doit1() local_unnamed_addr{
+define void @doit1(i32* %ptr) {
+; CHECK-LABEL: @doit1(
+; CHECK: vector.body:
+; CHECK-NEXT: [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT: [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
+
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
+
+; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
+
+; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, i32* %ptr, i32 [[TMP7]]
+; CHECK-NEXT: [[GEP2:%.+]] = getelementptr inbounds i32, i32* [[GEP1]], i32 0
+; CHECK-NEXT: [[GEP_BC:%.+]] = bitcast i32* [[GEP2]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[I32_IV]], <4 x i32>* [[GEP_BC]], align 4
+
+; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4
+; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
+; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+;
entry:
br label %for.body
%trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8
%i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv
+ %ptr.gep = getelementptr inbounds i32, i32* %ptr, i32 %main.iv
+ store i32 %i32.iv, i32* %ptr.gep
%noop.conv.under.pse = and i32 %i32.iv, 255
%i32.add = add nuw nsw i32 %noop.conv.under.pse, 9
%tobool = icmp eq i32 %inc, 16
br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body
-; CHECK-LABEL: @doit1(
-; CHECK: vector.body:
-; CHECK-NEXT: [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
-; CHECK-NEXT: [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
-
-; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
-
-; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
-
-; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4
-; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
-; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
-
for.cond.for.end_crit_edge:
store i8 %i8.add, i8* @b, align 1
br label %for.end