[SLPVectorizer][SVE] Bail out early for scalable vector.

author Huihui Zhang <huihuiz@quicinc.com>

Fri, 13 Mar 2020 18:23:01 +0000 (11:23 -0700)

committer Huihui Zhang <huihuiz@quicinc.com>

Fri, 13 Mar 2020 18:23:31 +0000 (11:23 -0700)
author Huihui Zhang <huihuiz@quicinc.com>
Fri, 13 Mar 2020 18:23:01 +0000 (11:23 -0700)
committer Huihui Zhang <huihuiz@quicinc.com>
Fri, 13 Mar 2020 18:23:31 +0000 (11:23 -0700)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 68d063d..e411c4e 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5899,25 +5899,15 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
    LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
                      << VL.size() << ".\n");
  
-  // Check that all of the parts are scalar instructions of the same type,
+  // Check that all of the parts are instructions of the same type,
    // we permit an alternate opcode via InstructionsState.
    InstructionsState S = getSameOpcode(VL);
    if (!S.getOpcode())
      return false;
  
    Instruction *I0 = cast<Instruction>(S.OpValue);
-  unsigned Sz = R.getVectorElementSize(I0);
-  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
-  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
-  if (MaxVF < 2) {
-    R.getORE()->emit([&]() {
-      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
-             << "Cannot SLP vectorize list: vectorization factor "
-             << "less than 2 is not supported";
-    });
-    return false;
-  }
-
+  // Make sure invalid types (including vector type) are rejected before
+  // determining vectorization factor for scalar instructions.
    for (Value *V : VL) {
      Type *Ty = V->getType();
      if (!isValidElementType(Ty)) {
@@ -5935,6 +5925,18 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
      }
    }
  
+  unsigned Sz = R.getVectorElementSize(I0);
+  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  if (MaxVF < 2) {
+    R.getORE()->emit([&]() {
+      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
+             << "Cannot SLP vectorize list: vectorization factor "
+             << "less than 2 is not supported";
+    });
+    return false;
+  }
+
    bool Changed = false;
    bool CandidateFound = false;
    int MinCost = SLPCostThreshold;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll

new file mode 100644 (file)

index 0000000..cfd2463
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that slp vectorizer is not trying to vectorize instructions already vectorized.
+; CHECK: remark: <unknown>:0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer
+
+define void @vector() {
+  %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
+  %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
+  %add = add <16 x i8> %load1, %load0
+  tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
+  ret void
+}
+
+declare <16 x i8> @vector.load(<16 x i8>*, i32)
+declare void @vector.store(<16 x i8>, <16 x i8>*, i32)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll

new file mode 100644 (file)

index 0000000..70ce0dc
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that we are not crashing or changing the code.
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT:    ret void
+;
+  %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %add = add <vscale x 16 x i8> %load1, %load0
+  tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+  ret void
+}
+
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)
author	Huihui Zhang <huihuiz@quicinc.com>
	Fri, 13 Mar 2020 18:23:01 +0000 (11:23 -0700)
committer	Huihui Zhang <huihuiz@quicinc.com>
	Fri, 13 Mar 2020 18:23:31 +0000 (11:23 -0700)
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll	[new file with mode: 0644]	patch \| blob