From 859366f93f2a1e443bb0a71da91d42382087b573 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 27 Oct 2012 04:11:32 +0000 Subject: [PATCH] 1. Fix a bug in getTypeConversion. When a *simple* type is split, we need to return the type of the split result. 2. Change the maximum vectorization width from 4 to 8. 3. A test for both. llvm-svn: 166864 --- llvm/include/llvm/Target/TargetLowering.h | 3 ++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../Transforms/LoopVectorize/X86/gcc-examples.ll | 62 ++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 9d0aeaa..13f80fd 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1980,6 +1980,9 @@ public: ValueTypeActions.getTypeAction(NVT.getSimpleVT()) != TypePromoteInteger) && "Promote may not follow Expand or Promote"); + if (LA == TypeSplitVector) + NVT = EVT::getVectorVT(Context, VT.getVectorElementType(), + VT.getVectorNumElements() / 2); return LegalizeKind(LA, NVT); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1773812..be197db 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -317,7 +317,7 @@ public: /// Returns the most profitable vectorization factor for the loop that is /// smaller or equal to the VF argument. This method checks every power /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = 4); + unsigned findBestVectorizationFactor(unsigned VF = 8); private: /// Returns the expected execution cost. The unit of the cost does diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll new file mode 100644 index 0000000..e7a63c9 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; Select VF = 8; +;CHECK: @example1 +;CHECK: load <8 x i32> +;CHECK: add <8 x i32> +;CHECK: store <8 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +;