--- /dev/null
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// Test to ensure the loop vectorize codegen option is passed down to the
+// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
+// automatically when O2/O3/Os is available for clang. Also check that
+// "-mllvm -vectorize-loops=false" will disable loop vectorization, overriding
+// the cc1 option.
+//
+// Check both the new and old PMs.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NOLPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-NOLPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O0-LPV
+// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+// O2-NOLPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+// Test to ensure the loop interleave codegen option is passed down to the
+// ThinLTO backend. The internal loop interleave codegen option will be
+// enabled automatically when O2/O3 is available for clang. Also check that
+// "-mllvm -interleave-loops=false" will disable the interleaving, overriding
+// the cc1 option.
+//
+// Check both the new and old PMs.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NoInterLeave
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
+// O2-InterLeave-COUNT-2: store <2 x double>
+// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
+// O2-NoInterLeave-COUNT-1: store <2 x double>
+// O2-NoInterLeave-NOT: store <2 x double>
+// O2-NoInterLeave: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+void foo(double *a) {
+ for (int i = 0; i < 1000; i++)
+ a[i] = 10;
+}
// REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
// RUN: llvm-lto -thinlto -o %t %t.o
// Test to ensure the slp vectorize codegen option is passed down to the
// ThinLTO backend. -vectorize-slp is a cc1 option and will be added
-// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp
-// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization
-// currently. "-mllvm -vectorize-slp=false" is added here in the test to
-// ensure the slp vectorization is executed because the -vectorize-slp cc1
-// flag is passed down, not because "-mllvm -vectorize-slp" is enabled
-// by default.
+// automatically when O2/O3/Os/Oz is available for clang. Also check that
+// "-mllvm -vectorize-slp=false" will disable slp vectorization, overriding
+// the cc1 option.
//
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP
-// O2-SLP: Running pass: SLPVectorizerPass
-// O0-SLP-NOT: Running pass: SLPVectorizerPass
-
-// Test to ensure the loop vectorize codegen option is passed down to the
-// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
-// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is
-// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization
-// currently. "-mllvm -vectorize-loops=false" is added here in the test to
-// ensure the loop vectorization is executed because the -vectorize-loops cc1
-// flag is passed down, not because "-mllvm -vectorize-loops" is enabled
-// by default.
-//
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
-// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
-// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
-
-// Test to ensure the loop interleave codegen option is passed down to the
-// ThinLTO backend. The internal loop interleave codegen option will be
-// enabled automatically when O2/O3 is available for clang. Once the loop
-// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable
-// the interleave. currently. "-mllvm -interleave-loops=false" is added here
-// in the test to ensure the loop interleave is executed because the interleave
-// codegen flag is passed down, not because "-mllvm -interleave-loops" is
-// enabled by default.
+// Check both the new and old PMs.
//
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
-// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
-// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=SLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=SLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=NOSLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP
+// SLP: extractelement
+// NOSLP-NOT: extractelement
-void foo(double *a) {
- for (int i = 0; i < 1000; i++)
- a[i] = 10;
+int foo(double *A, int n, int m) {
+ double sum = 0, v1 = 2, v0 = 3;
+ for (int i=0; i < n; ++i)
+ sum += 7*A[i*2] + 7*A[i*2+1];
+ return sum;
}
+
/// can be set in the PassBuilder when using a LLVM as a library.
PipelineTuningOptions();
- /// Tuning option to set loop interleaving on/off. Its default value is that
- /// of the flag: `-interleave-loops`.
+ /// Tuning option to set loop interleaving on/off, set based on opt level.
bool LoopInterleaving;
- /// Tuning option to enable/disable loop vectorization. Its default value is
- /// that of the flag: `-vectorize-loops`.
+ /// Tuning option to enable/disable loop vectorization, set based on opt
+ /// level.
bool LoopVectorization;
- /// Tuning option to enable/disable slp loop vectorization. Its default value
- /// is that of the flag: `vectorize-slp`.
+ /// Tuning option to enable/disable slp loop vectorization, set based on opt
+ /// level.
bool SLPVectorization;
/// Tuning option to enable/disable loop unrolling. Its default value is true.
/// The LoopVectorize Pass.
struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
+private:
/// If false, consider all loops for interleaving.
/// If true, only loops that explicitly request interleaving are considered.
bool InterleaveOnlyWhenForced;
/// If true, only loops that explicitly request vectorization are considered.
bool VectorizeOnlyWhenForced;
- LoopVectorizePass(LoopVectorizeOptions Opts = {})
- : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced),
- VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced) {}
+public:
+ LoopVectorizePass(LoopVectorizeOptions Opts = {});
ScalarEvolution *SE;
LoopInfo *LI;
} // end namespace slpvectorizer
-extern cl::opt<bool> RunSLPVectorization;
-
struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
PipelineTuningOptions::PipelineTuningOptions() {
- LoopInterleaving = EnableLoopInterleaving;
- LoopVectorization = EnableLoopVectorization;
- SLPVectorization = RunSLPVectorization;
+ LoopInterleaving = true;
+ LoopVectorization = true;
+ SLPVectorization = false;
LoopUnrolling = true;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
Coroutines = false;
LibraryInfo = nullptr;
Inliner = nullptr;
DisableUnrollLoops = false;
- SLPVectorize = RunSLPVectorization;
- LoopVectorize = EnableLoopVectorization;
- LoopsInterleaved = EnableLoopInterleaving;
+ SLPVectorize = false;
+ LoopVectorize = true;
+ LoopsInterleaved = true;
RerollLoops = RunLoopRerolling;
NewGVN = RunNewGVN;
LicmMssaOptCap = SetLicmMssaOptCap;
explicit LoopVectorize(bool InterleaveOnlyWhenForced = false,
bool VectorizeOnlyWhenForced = false)
- : FunctionPass(ID) {
- Impl.InterleaveOnlyWhenForced = InterleaveOnlyWhenForced;
- Impl.VectorizeOnlyWhenForced = VectorizeOnlyWhenForced;
+ : FunctionPass(ID),
+ Impl({InterleaveOnlyWhenForced, VectorizeOnlyWhenForced}) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
return true;
}
+LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
+ : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced ||
+ !EnableLoopInterleaving),
+ VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced ||
+ !EnableLoopVectorization) {}
+
bool LoopVectorizePass::processLoop(Loop *L) {
assert((EnableVPlanNativePath || L->empty()) &&
"VPlan-native path is not enabled. Only process inner loops.");
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
-cl::opt<bool>
- llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
- cl::desc("Run the SLP vectorization passes"));
+cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
LoopInfo *LI_, DominatorTree *DT_,
AssumptionCache *AC_, DemandedBits *DB_,
OptimizationRemarkEmitter *ORE_) {
+ if (!RunSLPVectorization)
+ return false;
SE = SE_;
TTI = TTI_;
TLI = TLI_;
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
-; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
+; RUN: opt < %s -O3 -vectorize-slp=false -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
cl::desc("Disable loop unrolling in all relevant passes"),
cl::init(false));
-static cl::opt<bool>
-DisableSLPVectorization("disable-slp-vectorization",
- cl::desc("Disable the slp vectorization pass"),
- cl::init(false));
-
static cl::opt<bool> EmitSummaryIndex("module-summary",
cl::desc("Emit module summary index"),
cl::init(false));
Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
DisableLoopUnrolling : OptLevel == 0;
- // Check if vectorization is explicitly disabled via -vectorize-loops=false.
- // The flag enables vectorization in the LoopVectorize pass, it is on by
- // default, and if it was disabled, leave it disabled here.
- // Another flag that exists: -loop-vectorize, controls adding the pass to the
- // pass manager. If set, the pass is added, and there is no additional check
- // here for it.
- if (Builder.LoopVectorize)
- Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
-
- // When #pragma vectorize is on for SLP, do the same as above
- Builder.SLPVectorize =
- DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2;
+ Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
+
+ Builder.SLPVectorize = OptLevel > 1 && SizeLevel < 2;
if (TM)
TM->adjustPassManager(Builder);