[Scalarizer] Add UnaryOperator visitor to scalarization pass

author Cameron McInally <cameron.mcinally@nyu.edu>

Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)

committer Cameron McInally <cameron.mcinally@nyu.edu>

Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)
author Cameron McInally <cameron.mcinally@nyu.edu>
Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)
committer Cameron McInally <cameron.mcinally@nyu.edu>
Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp

index 0bd0fff1aa59b4183d0d77a1fb36e6324065ee66..515a64827730028d7bf0cf39d265937275c8f039 100644 (file)
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -124,6 +124,18 @@ struct ICmpSplitter {
    ICmpInst &ICI;
  };
  
+// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
+// a unary operator like UO called Name with operand X.
+struct UnarySplitter {
+  UnarySplitter(UnaryOperator &uo) : UO(uo) {}
+
+  Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
+    return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
+  }
+
+  UnaryOperator &UO;
+};
+
  // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
  // a binary operator like BO called Name with operands X and Y.
  struct BinarySplitter {
@@ -173,6 +185,7 @@ public:
    bool visitSelectInst(SelectInst &SI);
    bool visitICmpInst(ICmpInst &ICI);
    bool visitFCmpInst(FCmpInst &FCI);
+  bool visitUnaryOperator(UnaryOperator &UO);
    bool visitBinaryOperator(BinaryOperator &BO);
    bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
    bool visitCastInst(CastInst &CI);
@@ -192,6 +205,7 @@ private:
                         const DataLayout &DL);
    bool finish();
  
+  template<typename T> bool splitUnary(Instruction &, const T &);
    template<typename T> bool splitBinary(Instruction &, const T &);
  
    bool splitCall(CallInst &CI);
@@ -419,6 +433,26 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
    return true;
  }
  
+// Scalarize one-operand instruction I, using Split(Builder, X, Name)
+// to create an instruction like I with operand X and name Name.
+template<typename Splitter>
+bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
+  VectorType *VT = dyn_cast<VectorType>(I.getType());
+  if (!VT)
+    return false;
+
+  unsigned NumElems = VT->getNumElements();
+  IRBuilder<> Builder(&I);
+  Scatterer Op = scatter(&I, I.getOperand(0));
+  assert(Op.size() == NumElems && "Mismatched unary operation");
+  ValueVector Res;
+  Res.resize(NumElems);
+  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+    Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
+  gather(&I, Res);
+  return true;
+}
+
  // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
  // to create an instruction like I with operands X and Y and name Name.
  template<typename Splitter>
@@ -551,6 +585,10 @@ bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
    return splitBinary(FCI, FCmpSplitter(FCI));
  }
  
+bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
+  return splitUnary(UO, UnarySplitter(UO));
+}
+
  bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
    return splitBinary(BO, BinarySplitter(BO));
  }
diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll

index 29a82fd8090c4f121878e08d0327f3514283531b..577f0b19bb1ede7d17942ec5a9bc76b155116941 100644 (file)
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -444,6 +444,68 @@ exit:
    ret <4 x float> %next_acc
  }
  
+; Test unary operator scalarization.
+define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
+; CHECK-LABEL: @f15(
+; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
+; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
+; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
+; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
+; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
+; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
+; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
+; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
+; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
+; CHECK: %neg.i0 = fneg float %val.i0
+; CHECK: %neg.i1 = fneg float %val.i1
+; CHECK: %neg.i2 = fneg float %val.i2
+; CHECK: %neg.i3 = fneg float %val.i3
+; CHECK: %neg.upto0 = insertelement <4 x float> undef, float %neg.i0, i32 0
+; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1
+; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2
+; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3
+; CHECK: %call = call <4 x float> @ext(<4 x float> %neg)
+; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
+; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00
+; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
+; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00
+; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
+; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00
+; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
+; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00
+; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00
+; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00
+; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00
+; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00
+; CHECK: store float %sel.i0, float* %ptr.i0, align 16
+; CHECK: store float %sel.i1, float* %ptr.i1, align 4
+; CHECK: store float %sel.i2, float* %ptr.i2, align 8
+; CHECK: store float %sel.i3, float* %ptr.i3, align 4
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
+  %val = load <4 x float> , <4 x float> *%ptr
+  %neg = fneg <4 x float> %val
+  %call = call <4 x float> @ext(<4 x float> %neg)
+  %cmp = fcmp ogt <4 x float> %call,
+  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %sel = select <4 x i1> %cmp, <4 x float> %call,
+  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  store <4 x float> %sel, <4 x float> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
  !0 = !{ !"root" }
  !1 = !{ !"set1", !0 }
  !2 = !{ !"set2", !0 }
author	Cameron McInally <cameron.mcinally@nyu.edu>
	Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)
committer	Cameron McInally <cameron.mcinally@nyu.edu>
	Tue, 4 Jun 2019 23:01:36 +0000 (23:01 +0000)
llvm/lib/Transforms/Scalar/Scalarizer.cpp		patch \| blob \| history
llvm/test/Transforms/Scalarizer/basic.ll		patch \| blob \| history