From: Thomas Lively Date: Fri, 19 Oct 2018 21:11:43 +0000 (+0000) Subject: [LoopVectorize] Loop vectorization for minimum and maximum X-Git-Tag: llvmorg-8.0.0-rc1~6239 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8a91cf1cc511d9f7a9a46b807982615cf548f911;p=platform%2Fupstream%2Fllvm.git [LoopVectorize] Loop vectorization for minimum and maximum Summary: Depends on D52766. Reviewers: aheejin, dschuff Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52767 llvm-svn: 344816 --- diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index e14449b..5fd6fe0 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -54,6 +54,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::fabs: case Intrinsic::minnum: case Intrinsic::maxnum: + case Intrinsic::minimum: + case Intrinsic::maximum: case Intrinsic::copysign: case Intrinsic::floor: case Intrinsic::ceil: diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 178d602..203c443 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -1247,3 +1247,59 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +declare float @llvm.minimum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @minimum_f32( +;CHECK: llvm.minimum.v4f32 +;CHECK: ret void +define void @minimum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4 + %call = tail call float @llvm.minimum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.maximum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @maximum_f32( +;CHECK: llvm.maximum.v4f32 +;CHECK: ret void +define void @maximum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4 + %call = tail call float @llvm.maximum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll index 6c85ac3..7cebdff 100644 --- a/llvm/test/Transforms/Scalarizer/intrinsics.ll +++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -5,6 +5,8 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) ; Binary fp declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) ; Ternary fp declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) @@ -40,6 +42,28 @@ define <2 x float> @scalarize_minnum_v2f32(<2 x float> %x, <2 x float> %y) #0 { ret <2 x float> %minnum } +; CHECK-LABEL: @scalarize_minimum_v2f32( +; CHECK: %minimum.i0 = call float @llvm.minimum.f32(float %x.i0, float %y.i0) +; CHECK: %minimum.i1 = call float @llvm.minimum.f32(float %x.i1, float %y.i1) +; CHECK: %minimum.upto0 = insertelement <2 x float> undef, float %minimum.i0, i32 0 +; CHECK: %minimum = insertelement <2 x float> %minimum.upto0, float %minimum.i1, i32 1 +; CHECK: ret <2 x float> %minimum +define <2 x float> @scalarize_minimum_v2f32(<2 x float> %x, <2 x float> %y) #0 { + %minimum = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> %y) + ret <2 x float> %minimum +} + +; CHECK-LABEL: @scalarize_maximum_v2f32( +; CHECK: %maximum.i0 = call float @llvm.maximum.f32(float %x.i0, float %y.i0) +; CHECK: %maximum.i1 = call float @llvm.maximum.f32(float %x.i1, float %y.i1) +; CHECK: %maximum.upto0 = insertelement <2 x float> undef, float %maximum.i0, i32 0 +; CHECK: %maximum = insertelement <2 x float> %maximum.upto0, float %maximum.i1, i32 1 +; CHECK: ret <2 x float> %maximum +define <2 x float> @scalarize_maximum_v2f32(<2 x float> %x, <2 x float> %y) #0 { + %maximum = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> %y) + ret <2 x float> %maximum +} + ; CHECK-LABEL: @scalarize_fma_v2f32( ; CHECK: %fma.i0 = call float @llvm.fma.f32(float %x.i0, float %y.i0, float %z.i0) ; CHECK: %fma.i1 = call float @llvm.fma.f32(float %x.i1, float %y.i1, float %z.i1)