[LV] Add -scalable-vectorization=<option> flag.

author Sander de Smalen <sander.desmalen@arm.com>

Thu, 8 Apr 2021 11:19:44 +0000 (12:19 +0100)

committer Sander de Smalen <sander.desmalen@arm.com>

Wed, 19 May 2021 09:40:56 +0000 (10:40 +0100)
author Sander de Smalen <sander.desmalen@arm.com>
Thu, 8 Apr 2021 11:19:44 +0000 (12:19 +0100)
committer Sander de Smalen <sander.desmalen@arm.com>
Wed, 19 May 2021 09:40:56 +0000 (10:40 +0100)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

index bed637e..a66925e 100644 (file)
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -96,6 +96,20 @@ public:
      FK_Enabled = 1,    ///< Forcing enabled.
    };
  
+  enum ScalableForceKind {
+    /// Not selected.
+    SK_Unspecified = -1,
+    /// Disables vectorization with scalable vectors.
+    SK_FixedWidthOnly = 0,
+    /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
+    /// scalable vectors when the cost-model is inconclusive. This is the
+    /// default when the scalable.enable hint is enabled through a pragma.
+    SK_PreferScalable = 1,
+    /// Vectorize loops using scalable vectors or fixed-width  vectors, but
+    /// favor fixed-width vectors when the cost is inconclusive.
+    SK_PreferFixedWidth = 2,
+  };
+
    LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
                       OptimizationRemarkEmitter &ORE);
  
@@ -109,7 +123,8 @@ public:
    void emitRemarkWithHints() const;
  
    ElementCount getWidth() const {
-    return ElementCount::get(Width.Value, isScalable());
+    return ElementCount::get(Width.Value,
+                             isScalableVectorizationExplicitlyEnabled());
    }
    unsigned getInterleave() const {
      if (Interleave.Value)
@@ -129,7 +144,16 @@ public:
      return (ForceKind)Force.Value;
    }
  
-  bool isScalable() const { return Scalable.Value; }
+  /// \return true if scalable vectorization has been explicitly enabled.
+  bool isScalableVectorizationExplicitlyEnabled() const {
+    return Scalable.Value == SK_PreferFixedWidth ||
+           Scalable.Value == SK_PreferScalable;
+  }
+
+  /// \return true if scalable vectorization has been explicitly disabled.
+  bool isScalableVectorizationDisabled() const {
+    return Scalable.Value == SK_FixedWidthOnly;
+  }
  
    /// If hints are provided that force vectorization, use the AlwaysPrint
    /// pass name to force the frontend to print the diagnostic.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index 69d7554..59b2aa9 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -48,6 +48,23 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
      cl::desc("The maximum number of SCEV checks allowed with a "
               "vectorize(enable) pragma"));
  
+// FIXME: When scalable vectorization is stable enough, change the default
+// to SK_PreferFixedWidth.
+static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
+    "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
+    cl::Hidden,
+    cl::desc("Control whether the compiler can use scalable vectors to "
+             "vectorize a loop"),
+    cl::values(
+        clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
+                   "Scalable vectorization is disabled."),
+        clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
+                   "Scalable vectorization is available, but favor fixed-width "
+                   "vectorization when the cost is inconclusive."),
+        clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
+                   "Scalable vectorization is available and favored when the "
+                   "cost is inconclusive.")));
+
  /// Maximum vectorization interleave count.
  static const unsigned MaxInterleaveFactor = 16;
  
@@ -77,8 +94,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
        Force("vectorize.enable", FK_Undefined, HK_FORCE),
        IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
        Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
-      Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
-      ORE(ORE) {
+      Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
+      TheLoop(L), ORE(ORE) {
    // Populate values with existing loop metadata.
    getHintsFromMetadata();
  
@@ -86,6 +103,16 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
    if (VectorizerParams::isInterleaveForced())
      Interleave.Value = VectorizerParams::VectorizationInterleave;
  
+  if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
+    // If the width is set, but the metadata says nothing about the scalable
+    // property, then assume it concerns only a fixed-width UserVF.
+    // If width is not set, the flag takes precedence.
+    Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
+  else if (ScalableVectorization == SK_FixedWidthOnly)
+    // If the flag is set to disable any use of scalable vectors, override the
+    // loop hint.
+    Scalable.Value = SK_FixedWidthOnly;
+
    if (IsVectorized.Value != 1)
      // If the vectorization width and interleaving count are both 1 then
      // consider the loop to have been already vectorized because there's
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 5bccd7d..a4edca3 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5648,6 +5648,12 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
      return ElementCount::getScalable(0);
    }
  
+  if (Hints->isScalableVectorizationDisabled()) {
+    reportVectorizationInfo("Scalable vectorization is explicitly disabled",
+                            "ScalableVectorizationDisabled", ORE, TheLoop);
+    return ElementCount::getScalable(0);
+  }
+
    auto MaxScalableVF = ElementCount::getScalable(
        std::numeric_limits<ElementCount::ScalarTy>::max());
  
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

index 3eb0eef..d93e5b8 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
  
  ; This test currently fails when the LV calculates a maximums safe
  ; distance for scalable vectors, because the code to eliminate the tail is
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll

index 2e2754c..a88b960 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -1,5 +1,5 @@
-; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
-; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
  
  ; We vectorize this first order recurrence, with a set of insertelements for
  ; each unrolled part. Make sure these insertelements are generated in-order,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll

index f4114fe..7d6469f 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
@@ -1,5 +1,5 @@
  ; REQUIRES: asserts
-; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug < %s 2>%t | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
  ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
  
  target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

index 8fa057f..2bbc6df 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
  
  define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
  ; CHECK-LABEL: @vec_load
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

index 7b410dd..e45c535 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S 2>%t | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
  ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
  
  ; Reduction can be vectorized
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

index 69748f0..48c2fd9 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
  
  define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
  ; CHECK-LABEL: @fadd_strict
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll

index 6fe5464..2213fde 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
@@ -1,6 +1,8 @@
  ; REQUIRES: asserts
-; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
-; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on        < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off       < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
  
  ; Test that the MaxVF for the following loop, that has no dependence distances,
  ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
@@ -8,6 +10,8 @@
  define void @test0(i32* %a, i8* %b, i32* %c) {
  ; CHECK: LV: Checking a loop in "test0"
  ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
  ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
  entry:
    br label %loop
@@ -35,6 +39,8 @@ exit:
  define void @test1(i32* %a, i8* %b) {
  ; CHECK: LV: Checking a loop in "test1"
  ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
  ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
  entry:
    br label %loop
@@ -63,6 +69,8 @@ exit:
  define void @test2(i32* %a, i8* %b) {
  ; CHECK: LV: Checking a loop in "test2"
  ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
  ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
  entry:
    br label %loop
@@ -91,6 +99,8 @@ exit:
  define void @test3(i32* %a, i8* %b) {
  ; CHECK: LV: Checking a loop in "test3"
  ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
  ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
  entry:
    br label %loop
@@ -120,6 +130,8 @@ exit:
  define void @test4(i32* %a, i32* %b) {
  ; CHECK: LV: Checking a loop in "test4"
  ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
  ; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
  entry:
    br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

index 33d75e0..a29cf18 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -1,8 +1,8 @@
  ; REQUIRES: asserts
-; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s
-; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
-; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s
-; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
+; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s
+; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll

index ad6f42b..79cb349 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve < %s -S | FileCheck %s
+; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -scalable-vectorization=on < %s -S | FileCheck %s
  
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

index 2536c6c..3054f3a 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
  
  define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) {
  ; CHECK-LABEL: @cond_inv_load_i32i32i16
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll

index 56a53a5..d6c509f 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

index 58f683c..5738019 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
+; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
  
  define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {
  ; CHECK-LABEL: @gather_nxv4i32_ind64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

index 192d181..fff09c8 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine < %s -S | FileCheck %s
+; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s
  
  ; Test that we can add on the induction variable
  ;   for (long long i = 0; i < n; i++) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll

index a12ec7f..2c6741b 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -scalable-vectorization=on -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s
  
  define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
  ; CHECK-LABEL: @invariant_load
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

index e804dd3..8327e09 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s
+; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
  
  define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
  ; CHECK-LABEL: @stride7_i32(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll

index 9988bfd..a2864a1 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
+; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
  
  define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
  ; CHECK-LABEL: @mloadstore_f32
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll

index 221a6a6..eb87f77 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine < %s -S | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s
  
  target triple = "aarch64-unknown-linux-gnu"
  
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

index ab45cd8..cd7fa7e 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -10,7 +10,7 @@
  
  ; The test checks if the mask is being correctly created, reverted and used
  
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll

index c439270..002dadd 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -4,7 +4,7 @@
  ;  for (int i = N-1; i >= 0; --i)
  ;    a[i] = b[i] + 1.0;
  
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
  
  define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{
  ; CHECK-LABEL: @vector_reverse_f64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

index 353ad64..2745e95 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
+; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s | FileCheck %s
  
  ; Ensure that we can vectorize loops such as:
  ;   int *ptr = c;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll

index 69f0073..57c612e 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@@ -1,5 +1,5 @@
  ; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \
-; RUN:   -riscv-v-vector-bits-max=512 -S < %s 2>&1 \
+; RUN:   -riscv-v-vector-bits-max=512 -S -scalable-vectorization=on < %s 2>&1 \
  ; RUN:   | FileCheck %s
  
  ; void test(int *a, int *b, int N) {
diff --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll

index 1dc51ca..7b60bc7 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=on | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=preferred | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  
@@ -65,11 +66,37 @@ for.end:                                          ; preds = %for.body, %entry
    ret void
  }
  
+; Test that when only the width property is specified, and not the scalable.enable flag,
+; that the hint defaults to 'scalable.enable = false'.
+
+; CHECK-LABEL: @test4(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test4(i32* nocapture %a, i32 %n) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 42, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
  attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
-!0 = !{!0, !1}
+!0 = !{!0, !1, !5}
  !1 = !{!"llvm.loop.vectorize.width", i32 8}
  !2 = !{!2, !1, !3}
  !3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1}
  !4 = !{!4, !1, !5}
  !5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0}
+!6 = !{!6, !7}
+!7 = !{!"llvm.loop.vectorize.width", i32 8}
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll

index 0a6925d..c42bb33 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
@@ -1,5 +1,5 @@
  ; REQUIRES: asserts
-; RUN: opt < %s  -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s  -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
  
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll

index bbcc553..0e660a9 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -1,5 +1,5 @@
-; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
-; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
+; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
  
  ; void recurrence_1(int *a, int *b, int n) {
  ;   for(int i = 0; i < n; i++)
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll

index 3d845e6..93715c2 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF1
-; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF2
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF1
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF2
  
  ; CHECKUF1: for.body.preheader:
  ; CHECKUF1-DAG: %wide.trip.count = zext i32 %N to i64
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll

index 03af7b3..efa7080 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-vectorize -force-target-supports-scalable-vectors=true -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-target-supports-scalable-vectors=true -scalable-vectorization=on -S | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll

index f99d87f..6f5a591 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
@@ -1,5 +1,5 @@
  ; REQUIRES: asserts
-; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
author	Sander de Smalen <sander.desmalen@arm.com>
	Thu, 8 Apr 2021 11:19:44 +0000 (12:19 +0100)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Wed, 19 May 2021 09:40:56 +0000 (10:40 +0100)
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/metadata-width.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll		patch \| blob \| history