From 4cb8cdab5e0002f4672c85afd467da8230dff476 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 31 Oct 2012 21:40:39 +0000 Subject: [PATCH] LoopVectorize: Preserve NSW, NUW and IsExact flags. llvm-svn: 167174 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 +++++- .../Transforms/LoopVectorize/X86/gcc-examples.ll | 2 +- llvm/test/Transforms/LoopVectorize/flags.ll | 53 ++++++++++++++++++++++ llvm/test/Transforms/LoopVectorize/gcc-examples.ll | 4 +- llvm/test/Transforms/LoopVectorize/increment.ll | 2 +- llvm/test/Transforms/LoopVectorize/non-const-n.ll | 2 +- llvm/test/Transforms/LoopVectorize/reduction.ll | 2 +- .../Transforms/LoopVectorize/start-non-zero.ll | 4 +- 8 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/flags.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 94e56a1..c9871e2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -849,8 +849,19 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { BinaryOperator *BinOp = dyn_cast(Inst); Value *A = getVectorValue(Inst->getOperand(0)); Value *B = getVectorValue(Inst->getOperand(1)); + // Use this vector value for all users of the original instruction. - WidenMap[Inst] = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + WidenMap[Inst] = V; + + // Update the NSW, NUW and Exact flags. + BinaryOperator *VecOp = cast(V); + if (isa(BinOp)) { + VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); + VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); + } + if (isa(VecOp)) + VecOp->setIsExact(BinOp->isExact()); break; } case Instruction::Select: { diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll index e7a63c9..574c529 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; Select VF = 8; ;CHECK: @example1 ;CHECK: load <8 x i32> -;CHECK: add <8 x i32> +;CHECK: add nsw <8 x i32> ;CHECK: store <8 x i32> ;CHECK: ret void define void @example1() nounwind uwtable ssp { diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll new file mode 100644 index 0000000..2f22a76 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/flags.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @flags1 +;CHECK: load <4 x i32> +;CHECK: mul nsw <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret i32 +define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = mul nsw i32 %3, 3 + store i32 %4, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + + +;CHECK: @flags2 +;CHECK: load <4 x i32> +;CHECK: mul <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret i32 +define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = mul i32 %3, 3 + store i32 %4, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll index d8942ac..fce29d2 100644 --- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll @@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: @example1 ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example1() nounwind uwtable ssp { @@ -227,6 +227,8 @@ define i32 @example9() nounwind uwtable readonly ssp { } ;CHECK: @example10a +;CHECK: load <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: load <4 x i16> ;CHECK: add <4 x i16> ;CHECK: store <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll index 069b7ea..71ea7689 100644 --- a/llvm/test/Transforms/LoopVectorize/increment.ll +++ b/llvm/test/Transforms/LoopVectorize/increment.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; } ;CHECK: @inc ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @inc(i32 %n) nounwind uwtable noinline ssp { diff --git a/llvm/test/Transforms/LoopVectorize/non-const-n.ll b/llvm/test/Transforms/LoopVectorize/non-const-n.ll index 7727b0a..1a6c15e 100644 --- a/llvm/test/Transforms/LoopVectorize/non-const-n.ll +++ b/llvm/test/Transforms/LoopVectorize/non-const-n.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: shl i32 ;CHECK: zext i32 ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example1(i32 %n) nounwind uwtable ssp { diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index d19f7c1..c1848b3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -66,7 +66,7 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap ;CHECK: @reduction_mix ;CHECK: phi <4 x i32> ;CHECK: load <4 x i32> -;CHECK: mul <4 x i32> +;CHECK: mul nsw <4 x i32> ;CHECK: ret i32 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { %1 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll index 257df05..5aa3bc0 100644 --- a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll +++ b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ;CHECK: @start_at_nonzero -;CHECK: mul <4 x i32> +;CHECK: mul nuw <4 x i32> ;CHECK: ret i32 define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp { entry: @@ -19,7 +19,7 @@ for.body: ; preds = %for.body.lr.ph, %fo %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv %1 = load i32* %arrayidx, align 4, !tbaa !0 - %mul = mul nsw i32 %1, 333 + %mul = mul nuw i32 %1, 333 store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 %indvars.iv.next = add i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 -- 2.7.4