[Passes][VectorCombine] enable early run generally and try load folds

author Sanjay Patel <spatel@rotateright.com>

Mon, 21 Nov 2022 18:16:56 +0000 (13:16 -0500)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 21 Nov 2022 18:57:55 +0000 (13:57 -0500)
author Sanjay Patel <spatel@rotateright.com>
Mon, 21 Nov 2022 18:16:56 +0000 (13:16 -0500)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 21 Nov 2022 18:57:55 +0000 (13:57 -0500)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp

index 2183681..285645b 100644 (file)
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -615,10 +615,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
    // Delete small array after loop unroll.
    FPM.addPass(SROAPass());
  
-  // The matrix extension can introduce large vector operations early, which can
-  // benefit from running vector-combine early on.
-  if (EnableMatrix)
-    FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
+  // Try vectorization/scalarization transforms that are both improvements
+  // themselves and can allow further folds with GVN and InstCombine.
+  FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
  
    // Eliminate redundancies.
    FPM.addPass(MergedLoadStoreMotionPass());
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp

index 7bd9ee9..7769aaf 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1700,8 +1700,6 @@ bool VectorCombine::run() {
      Builder.SetInsertPoint(&I);
      if (!TryEarlyFoldsOnly) {
        if (isa<FixedVectorType>(I.getType())) {
-        MadeChange |= vectorizeLoadInsert(I);
-        MadeChange |= widenSubvectorLoad(I);
          MadeChange |= foldInsExtFNeg(I);
          MadeChange |= foldBitcastShuf(I);
          MadeChange |= foldShuffleOfBinops(I);
@@ -1713,6 +1711,8 @@ bool VectorCombine::run() {
        }
      }
      if (isa<FixedVectorType>(I.getType())) {
+      MadeChange |= vectorizeLoadInsert(I);
+      MadeChange |= widenSubvectorLoad(I);
        MadeChange |= scalarizeBinopOrCmp(I);
        MadeChange |= scalarizeLoadExtract(I);
      }
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll

index 303e164..ddb84d0 100644 (file)
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -185,7 +185,7 @@
  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
-; CHECK-MATRIX: Running pass: VectorCombinePass
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll

index 42d553c..997f707 100644 (file)
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -158,6 +158,7 @@
  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

index e67b23d..51a0b45 100644 (file)
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -119,6 +119,7 @@
  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

index 01204ec..0390d64 100644 (file)
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -128,6 +128,7 @@
  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

index ccd7345..b47d19a 100644 (file)
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -157,6 +157,7 @@
  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

index dcaf0ff..87d2109 100644 (file)
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -122,6 +122,7 @@
  ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass
  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  ; CHECK-O-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll

index 1ed7229..d37638a 100644 (file)
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
@@ -12,20 +12,13 @@ $getAt = comdat any
  define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
  ; SSE-LABEL: @ConvertVectors_ByRef(
  ; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
-; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
-; SSE-NEXT:    ret <4 x float> [[TMP7]]
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; SSE-NEXT:    ret <4 x float> [[TMP3]]
  ;
  ; AVX-LABEL: @ConvertVectors_ByRef(
  ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2
-; AVX-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8
-; AVX-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2
-; AVX-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3
-; AVX-NEXT:    ret <4 x float> [[TMP6]]
+; AVX-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; AVX-NEXT:    ret <4 x float> [[TMP3]]
  ;
    %2 = alloca ptr, align 8
    %3 = alloca <4 x float>, align 16
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 21 Nov 2022 18:16:56 +0000 (13:16 -0500)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 21 Nov 2022 18:57:55 +0000 (13:57 -0500)
llvm/lib/Passes/PassBuilderPipelines.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VectorCombine.cpp		patch \| blob \| history
llvm/test/Other/new-pm-defaults.ll		patch \| blob \| history
llvm/test/Other/new-pm-thinlto-defaults.ll		patch \| blob \| history
llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll		patch \| blob \| history
llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll		patch \| blob \| history
llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll		patch \| blob \| history
llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll		patch \| blob \| history
llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll		patch \| blob \| history