From: Simon Pilgrim Date: Sat, 12 Feb 2022 21:48:31 +0000 (+0000) Subject: [SLP][X86] Add common check prefix for horizontal reduction tests X-Git-Tag: upstream/15.0.7~16694 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ea071884b0cc7210b3cc5fe858f0e892a779a23b;p=platform%2Fupstream%2Fllvm.git [SLP][X86] Add common check prefix for horizontal reduction tests --- diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 03ec04c..fd15446 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s -; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE +; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,CHECK +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,STORE ; #include ; @@ -16,71 +16,38 @@ ; } define i32 @add_red(float* %A, i32 %n) { -; CHECK-LABEL: @add_red( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] -; CHECK-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] -; CHECK-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) -; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] -; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] -; -; STORE-LABEL: @add_red( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 -; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] -; STORE-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 -; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] -; STORE-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 -; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] -; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* -; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) -; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] -; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; STORE: for.cond.for.end_crit_edge: -; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 -; STORE-NEXT: br label [[FOR_END]] -; STORE: for.end: -; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] +; ALL-LABEL: @add_red( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; ALL-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; ALL: for.body.lr.ph: +; ALL-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 +; ALL-NEXT: br label [[FOR_BODY:%.*]] +; ALL: for.body: +; ALL-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 +; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; ALL-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 +; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] +; ALL-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 +; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] +; ALL-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 +; ALL-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* +; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; ALL-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; ALL-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] +; ALL-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 +; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] +; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; ALL: for.cond.for.end_crit_edge: +; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 +; ALL-NEXT: br label [[FOR_END]] +; ALL: for.end: +; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp31 = icmp sgt i32 %n, 0 @@ -138,81 +105,43 @@ for.end: ; } define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { -; CHECK-LABEL: @mul_red( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; CHECK-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] -; CHECK-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] -; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] -; -; STORE-LABEL: @mul_red( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* -; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 -; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; STORE-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 -; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] -; STORE-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 -; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] -; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] -; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; STORE: for.cond.for.end_crit_edge: -; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 -; STORE-NEXT: br label [[FOR_END]] -; STORE: for.end: -; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] +; ALL-LABEL: @mul_red( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; ALL: for.body.lr.ph: +; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 +; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 +; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* +; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; ALL-NEXT: br label [[FOR_BODY:%.*]] +; ALL: for.body: +; ALL-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 +; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; ALL-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 +; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] +; ALL-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 +; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] +; ALL-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 +; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] +; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* +; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] +; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) +; ALL-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] +; ALL-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 +; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] +; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; ALL: for.cond.for.end_crit_edge: +; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 +; ALL-NEXT: br label [[FOR_END]] +; ALL: for.end: +; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp38 = icmp sgt i32 %n, 0 @@ -282,119 +211,62 @@ for.end: ; } define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { -; CHECK-LABEL: @long_red( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 -; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 -; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] -; CHECK-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] -; CHECK-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 -; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] -; CHECK-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] -; CHECK-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 -; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; CHECK-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 -; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] -; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 -; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 -; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] -; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] -; -; STORE-LABEL: @long_red( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; STORE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 -; STORE-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 -; STORE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 -; STORE-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 -; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* -; STORE-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; STORE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 -; STORE-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 -; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] -; STORE-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 -; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] -; STORE-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 -; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] -; STORE-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 -; STORE-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] -; STORE-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 -; STORE-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] -; STORE-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 -; STORE-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; STORE-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 -; STORE-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] -; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* -; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] -; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 -; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] -; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 -; STORE-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] -; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) -; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] -; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; STORE: for.cond.for.end_crit_edge: -; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 -; STORE-NEXT: br label [[FOR_END]] -; STORE: for.end: -; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] +; ALL-LABEL: @long_red( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; ALL: for.body.lr.ph: +; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 +; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 +; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 +; ALL-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 +; ALL-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 +; ALL-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 +; ALL-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* +; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 +; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 +; ALL-NEXT: br label [[FOR_BODY:%.*]] +; ALL: for.body: +; ALL-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 +; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; ALL-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 +; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] +; ALL-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 +; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] +; ALL-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 +; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] +; ALL-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 +; ALL-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] +; ALL-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 +; ALL-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] +; ALL-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 +; ALL-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] +; ALL-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 +; ALL-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] +; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* +; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 +; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] +; ALL-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 +; ALL-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] +; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 +; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] +; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) +; ALL-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] +; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] +; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 +; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] +; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; ALL: for.cond.for.end_crit_edge: +; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 +; ALL-NEXT: br label [[FOR_END]] +; ALL: for.end: +; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp81 = icmp sgt i32 %n, 0 @@ -494,81 +366,43 @@ for.end: ; } define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { -; CHECK-LABEL: @chain_red( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] -; CHECK-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] -; CHECK-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 -; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] -; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] -; -; STORE-LABEL: @chain_red( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; STORE-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; STORE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* -; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 -; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] -; STORE-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 -; STORE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] -; STORE-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 -; STORE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] -; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] -; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; STORE: for.cond.for.end_crit_edge: -; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 -; STORE-NEXT: br label [[FOR_END]] -; STORE: for.end: -; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] +; ALL-LABEL: @chain_red( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; ALL: for.body.lr.ph: +; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 +; ALL-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 +; ALL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* +; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; ALL-NEXT: br label [[FOR_BODY:%.*]] +; ALL: for.body: +; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 +; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; ALL-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 +; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] +; ALL-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 +; ALL-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] +; ALL-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 +; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] +; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* +; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] +; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) +; ALL-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] +; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 +; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] +; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; ALL: for.cond.for.end_crit_edge: +; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 +; ALL-NEXT: br label [[FOR_END]] +; ALL: for.end: +; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp41 = icmp sgt i32 %n, 0 @@ -648,125 +482,65 @@ for.end: ; } define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -; CHECK-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] -; CHECK: for.body16.lr.ph: -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 -; CHECK-NEXT: br label [[FOR_BODY16:%.*]] -; CHECK: for.cond.cleanup15: -; CHECK-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] -; CHECK-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 -; CHECK-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 -; CHECK-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 -; CHECK-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.body16: -; CHECK-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] -; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 -; CHECK-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 -; CHECK-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] -; CHECK-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] -; CHECK-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 -; CHECK-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 -; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 -; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 -; CHECK-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 -; CHECK-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] -; CHECK-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] -; CHECK-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] -; CHECK-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] -; CHECK-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] -; -; STORE-LABEL: @foo( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.cond.cleanup: -; STORE-NEXT: ret void -; STORE: for.body: -; STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] -; STORE-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 -; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] -; STORE-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; STORE-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] -; STORE-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -; STORE-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 -; STORE-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] -; STORE-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -; STORE-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 -; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] -; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -; STORE-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] -; STORE: for.body16.lr.ph: -; STORE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] -; STORE-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 -; STORE-NEXT: br label [[FOR_BODY16:%.*]] -; STORE: for.cond.cleanup15: -; STORE-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] -; STORE-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 -; STORE-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 -; STORE-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 -; STORE-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 -; STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; STORE-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 -; STORE-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; STORE: for.body16: -; STORE-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] -; STORE-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] -; STORE-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] -; STORE-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 -; STORE-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 -; STORE-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] -; STORE-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] -; STORE-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 -; STORE-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 -; STORE-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 -; STORE-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 -; STORE-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 -; STORE-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] -; STORE-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] -; STORE-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] -; STORE-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] -; STORE-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] +; ALL-LABEL: @foo( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 +; ALL-NEXT: br label [[FOR_BODY:%.*]] +; ALL: for.cond.cleanup: +; ALL-NEXT: ret void +; ALL: for.body: +; ALL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] +; ALL-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 +; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] +; ALL-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 +; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 +; ALL-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 +; ALL-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 +; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] +; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +; ALL-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] +; ALL: for.body16.lr.ph: +; ALL-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] +; ALL-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 +; ALL-NEXT: br label [[FOR_BODY16:%.*]] +; ALL: for.cond.cleanup15: +; ALL-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] +; ALL-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 +; ALL-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 +; ALL-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 +; ALL-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 +; ALL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; ALL-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 +; ALL-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; ALL: for.body16: +; ALL-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] +; ALL-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] +; ALL-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] +; ALL-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 +; ALL-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 +; ALL-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] +; ALL-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] +; ALL-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 +; ALL-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 +; ALL-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 +; ALL-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 +; ALL-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 +; ALL-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] +; ALL-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] +; ALL-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] +; ALL-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] +; ALL-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 +; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] +; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] ; entry: %cmp1495 = icmp eq i32 %arg_B, 0 @@ -1541,19 +1315,12 @@ entry: declare i32 @foobar(i32) define void @i32_red_call(i32 %val) { -; CHECK-LABEL: @i32_red_call( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) -; CHECK-NEXT: ret void -; -; STORE-LABEL: @i32_red_call( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) -; STORE-NEXT: ret void +; ALL-LABEL: @i32_red_call( +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; ALL-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) +; ALL-NEXT: ret void ; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 @@ -1576,31 +1343,18 @@ entry: } define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 { -; CHECK-LABEL: @i32_red_invoke( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) -; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] -; CHECK: exception: -; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 -; CHECK-NEXT: cleanup -; CHECK-NEXT: br label [[NORMAL]] -; CHECK: normal: -; CHECK-NEXT: ret void -; -; STORE-LABEL: @i32_red_invoke( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) -; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] -; STORE: exception: -; STORE-NEXT: [[CLEANUP:%.*]] = landingpad i8 -; STORE-NEXT: cleanup -; STORE-NEXT: br label [[NORMAL]] -; STORE: normal: -; STORE-NEXT: ret void +; ALL-LABEL: @i32_red_invoke( +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; ALL-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) +; ALL-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; ALL: exception: +; ALL-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; ALL-NEXT: cleanup +; ALL-NEXT: br label [[NORMAL]] +; ALL: normal: +; ALL-NEXT: ret void ; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 @@ -1628,35 +1382,20 @@ normal: ; Test case from PR47670. Reduction result is used as incoming value in phi. define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) { -; CHECK-LABEL: @reduction_result_used_in_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; CHECK-NEXT: ret i32 [[SUM_1]] -; -; STORE-LABEL: @reduction_result_used_in_phi( -; STORE-NEXT: entry: -; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; STORE: bb: -; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* -; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; STORE-NEXT: br label [[EXIT]] -; STORE: exit: -; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; STORE-NEXT: ret i32 [[SUM_1]] +; ALL-LABEL: @reduction_result_used_in_phi( +; ALL-NEXT: entry: +; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; ALL: bb: +; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 +; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 +; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* +; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; ALL-NEXT: br label [[EXIT]] +; ALL: exit: +; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] +; ALL-NEXT: ret i32 [[SUM_1]] ; entry: br i1 %b, label %bb, label %exit @@ -1680,35 +1419,20 @@ exit: } define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) { -; CHECK-LABEL: @reduction_result_used_in_phi_loop( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; CHECK-NEXT: ret i32 [[SUM_1]] -; -; STORE-LABEL: @reduction_result_used_in_phi_loop( -; STORE-NEXT: entry: -; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; STORE: bb: -; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* -; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; STORE-NEXT: br label [[EXIT]] -; STORE: exit: -; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; STORE-NEXT: ret i32 [[SUM_1]] +; ALL-LABEL: @reduction_result_used_in_phi_loop( +; ALL-NEXT: entry: +; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; ALL: bb: +; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 +; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 +; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* +; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; ALL-NEXT: br label [[EXIT]] +; ALL: exit: +; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] +; ALL-NEXT: ret i32 [[SUM_1]] ; entry: br i1 %b, label %bb, label %exit @@ -1734,25 +1458,15 @@ exit: ; Make sure we do not crash or infinite loop on ill-formed IR. define void @unreachable_block() { -; CHECK-LABEL: @unreachable_block( -; CHECK-NEXT: bb.0: -; CHECK-NEXT: br label [[BB_1:%.*]] -; CHECK: dead: -; CHECK-NEXT: [[T0:%.*]] = add i16 [[T0]], undef -; CHECK-NEXT: br label [[BB_1]] -; CHECK: bb.1: -; CHECK-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] -; CHECK-NEXT: ret void -; -; STORE-LABEL: @unreachable_block( -; STORE-NEXT: bb.0: -; STORE-NEXT: br label [[BB_1:%.*]] -; STORE: dead: -; STORE-NEXT: [[T0:%.*]] = add i16 [[T0]], undef -; STORE-NEXT: br label [[BB_1]] -; STORE: bb.1: -; STORE-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] -; STORE-NEXT: ret void +; ALL-LABEL: @unreachable_block( +; ALL-NEXT: bb.0: +; ALL-NEXT: br label [[BB_1:%.*]] +; ALL: dead: +; ALL-NEXT: [[T0:%.*]] = add i16 [[T0]], undef +; ALL-NEXT: br label [[BB_1]] +; ALL: bb.1: +; ALL-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] +; ALL-NEXT: ret void ; bb.0: br label %bb.1 @@ -1769,23 +1483,14 @@ bb.1: ; The FMF on the reduction should match the incoming insts. define float @fadd_v4f32_fmf(float* %p) { -; CHECK-LABEL: @fadd_v4f32_fmf( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; CHECK-NEXT: ret float [[TMP3]] -; -; STORE-LABEL: @fadd_v4f32_fmf( -; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* -; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; STORE-NEXT: ret float [[TMP3]] +; ALL-LABEL: @fadd_v4f32_fmf( +; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; ALL-NEXT: ret float [[TMP3]] ; %p1 = getelementptr inbounds float, float* %p, i64 1 %p2 = getelementptr inbounds float, float* %p, i64 2 @@ -1805,23 +1510,14 @@ define float @fadd_v4f32_fmf(float* %p) { ; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags. define float @fadd_v4f32_fmf_intersect(float* %p) { -; CHECK-LABEL: @fadd_v4f32_fmf_intersect( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; CHECK-NEXT: ret float [[TMP3]] -; -; STORE-LABEL: @fadd_v4f32_fmf_intersect( -; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* -; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; STORE-NEXT: ret float [[TMP3]] +; ALL-LABEL: @fadd_v4f32_fmf_intersect( +; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; ALL-NEXT: ret float [[TMP3]] ; %p1 = getelementptr inbounds float, float* %p, i64 1 %p2 = getelementptr inbounds float, float* %p, i64 2