From aba97f4aba2b8dbf5f3d222855cccf1ad75b72a1 Mon Sep 17 00:00:00 2001 From: Suyog Sarda Date: Wed, 19 Nov 2014 16:07:38 +0000 Subject: [PATCH] Vectorize a reduction chain feeding into a 'return' statement. e.x return (a[0]+b[0]) + (a[1]+b[1]) Differential Revision: http://reviews.llvm.org/D6227 llvm-svn: 222364 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 +++++++ llvm/test/Transforms/SLPVectorizer/X86/return.ll | 54 ++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/return.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 87b9874..44bfea1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3696,6 +3696,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } } + // Try to vectorize horizontal reductions feeding into a return. + if (ReturnInst *RI = dyn_cast(it)) + if (RI->getNumOperands() != 0) + if (BinaryOperator *BinOp = + dyn_cast(RI->getOperand(0))) { + DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); + if (tryToVectorizePair(BinOp->getOperand(0), + BinOp->getOperand(1), R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } + } + // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast(it)) { if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/return.ll b/llvm/test/Transforms/SLPVectorizer/X86/return.ll new file mode 100644 index 0000000..1a81c235 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "x86_64--linux-gnu" + +@a = common global [4 x double] zeroinitializer, align 8 +@b = common global [4 x double] zeroinitializer, align 8 + +; [4], b[4]; +; double foo() { +; double sum =0; +; sum = (a[0]+b[0]) + (a[1]+b[1]); +; return sum; +; } + +; CHECK-LABEL: @return1 +; CHECK: %0 = load <2 x double>* +; CHECK: %1 = load <2 x double>* +; CHECK: %2 = fadd <2 x double> + +define double @return1() { +entry: + %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8 + %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8 + %add0 = fadd double %a0, %b0 + %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8 + %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8 + %add1 = fadd double %a1, %b1 + %add2 = fadd double %add0, %add1 + ret double %add2 +} + +; double hadd(double *x) { +; return ((x[0] + x[2]) + (x[1] + x[3])); +; } + +; CHECK-LABEL: @return2 +; CHECK: %1 = load <2 x double>* +; CHECK: %3 = load <2 x double>* %2 +; CHECK: %4 = fadd <2 x double> %1, %3 + +define double @return2(double* nocapture readonly %x) { +entry: + %x0 = load double* %x, align 4 + %arrayidx1 = getelementptr inbounds double* %x, i32 2 + %x2 = load double* %arrayidx1, align 4 + %add3 = fadd double %x0, %x2 + %arrayidx2 = getelementptr inbounds double* %x, i32 1 + %x1 = load double* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds double* %x, i32 3 + %x3 = load double* %arrayidx3, align 4 + %add4 = fadd double %x1, %x3 + %add5 = fadd double %add3, %add4 + ret double %add5 +} -- 2.7.4