[VectorCombine] Switch to using a worklist.

author Florian Hahn <flo@fhahn.com>

Wed, 22 Sep 2021 08:30:53 +0000 (09:30 +0100)

committer Florian Hahn <flo@fhahn.com>

Wed, 22 Sep 2021 08:54:58 +0000 (09:54 +0100)
author Florian Hahn <flo@fhahn.com>
Wed, 22 Sep 2021 08:30:53 +0000 (09:30 +0100)
committer Florian Hahn <flo@fhahn.com>
Wed, 22 Sep 2021 08:54:58 +0000 (09:54 +0100)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp

index 8d01da9..dcf7b68 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -31,10 +31,12 @@
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Vectorize.h"
  
+#define DEBUG_TYPE "vector-combine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
  using namespace llvm;
  using namespace llvm::PatternMatch;
  
-#define DEBUG_TYPE "vector-combine"
  STATISTIC(NumVecLoad, "Number of vector loads formed");
  STATISTIC(NumVecCmp, "Number of vector compares formed");
  STATISTIC(NumVecBO, "Number of vector binops formed");
@@ -73,6 +75,7 @@ private:
    const DominatorTree &DT;
    AAResults &AA;
    AssumptionCache &AC;
+  InstructionWorklist Worklist;
  
    bool vectorizeLoadInsert(Instruction &I);
    ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
@@ -92,14 +95,26 @@ private:
    bool foldExtractedCmps(Instruction &I);
    bool foldSingleElementStore(Instruction &I);
    bool scalarizeLoadExtract(Instruction &I);
+
+  void replaceValue(Value &Old, Value &New) {
+    Old.replaceAllUsesWith(&New);
+    New.takeName(&Old);
+    if (auto *NewI = dyn_cast<Instruction>(&New)) {
+      Worklist.pushUsersToWorkList(*NewI);
+      Worklist.pushValue(NewI);
+    }
+    Worklist.pushValue(&Old);
+  }
+
+  void eraseInstruction(Instruction &I) {
+    for (Value *Op : I.operands())
+      Worklist.pushValue(Op);
+    Worklist.remove(&I);
+    I.eraseFromParent();
+  }
  };
  } // namespace
  
-static void replaceValue(Value &Old, Value &New) {
-  Old.replaceAllUsesWith(&New);
-  New.takeName(&Old);
-}
-
  bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
    // Match insert into fixed vector of scalar value.
    // TODO: Handle non-zero insert index.
@@ -501,6 +516,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
    else
      foldExtExtBinop(Ext0, Ext1, I);
  
+  Worklist.push(Ext0);
+  Worklist.push(Ext1);
    return true;
  }
  
@@ -928,8 +945,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
          DL);
      NSI->setAlignment(ScalarOpAlignment);
      replaceValue(I, *NSI);
-    // Need erasing the store manually.
-    I.eraseFromParent();
+    eraseInstruction(I);
      return true;
    }
  
@@ -939,11 +955,10 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
  /// Try to scalarize vector loads feeding extractelement instructions.
  bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
    Value *Ptr;
-  Value *Idx;
-  if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_Value(Idx))))
+  if (!match(&I, m_Load(m_Value(Ptr))))
      return false;
  
-  auto *LI = cast<LoadInst>(I.getOperand(0));
+  auto *LI = cast<LoadInst>(&I);
    const DataLayout &DL = I.getModule()->getDataLayout();
    if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
      return false;
@@ -1039,6 +1054,16 @@ bool VectorCombine::run() {
      return false;
  
    bool MadeChange = false;
+  auto FoldInst = [this, &MadeChange](Instruction &I) {
+    Builder.SetInsertPoint(&I);
+    MadeChange |= vectorizeLoadInsert(I);
+    MadeChange |= foldExtractExtract(I);
+    MadeChange |= foldBitcastShuf(I);
+    MadeChange |= scalarizeBinopOrCmp(I);
+    MadeChange |= foldExtractedCmps(I);
+    MadeChange |= scalarizeLoadExtract(I);
+    MadeChange |= foldSingleElementStore(I);
+  };
    for (BasicBlock &BB : F) {
      // Ignore unreachable basic blocks.
      if (!DT.isReachableFromEntry(&BB))
@@ -1047,21 +1072,22 @@ bool VectorCombine::run() {
      for (Instruction &I : make_early_inc_range(BB)) {
        if (isa<DbgInfoIntrinsic>(I))
          continue;
-      Builder.SetInsertPoint(&I);
-      MadeChange |= vectorizeLoadInsert(I);
-      MadeChange |= foldExtractExtract(I);
-      MadeChange |= foldBitcastShuf(I);
-      MadeChange |= scalarizeBinopOrCmp(I);
-      MadeChange |= foldExtractedCmps(I);
-      MadeChange |= scalarizeLoadExtract(I);
-      MadeChange |= foldSingleElementStore(I);
+      FoldInst(I);
      }
    }
  
-  // We're done with transforms, so remove dead instructions.
-  if (MadeChange)
-    for (BasicBlock &BB : F)
-      SimplifyInstructionsInBlock(&BB);
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.removeOne();
+    if (!I)
+      continue;
+
+    if (isInstructionTriviallyDead(I)) {
+      eraseInstruction(*I);
+      continue;
+    }
+
+    FoldInst(*I);
+  }
  
    return MadeChange;
  }
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

index 1089f54..8ba940f 100644 (file)
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -20,13 +20,14 @@ define void @matrix_extract_insert_scalar(i32 %i, i32 %k, i32 %j, [225 x double]
  ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 225
  ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP6]])
  ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <225 x double>, <225 x double>* [[TMP7]], align 8
-; CHECK-NEXT:    [[MATRIXEXT4:%.*]] = extractelement <225 x double> [[TMP8]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP5]]
+; CHECK-NEXT:    [[MATRIXEXT4:%.*]] = load double, double* [[TMP8]], align 8
  ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]]
-; CHECK-NEXT:    [[MATRIXEXT7:%.*]] = extractelement <225 x double> [[TMP8]], i64 [[TMP1]]
-; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    store double [[SUB]], double* [[TMP9]], align 8
+; CHECK-NEXT:    [[MATRIXEXT7:%.*]] = load double, double* [[TMP9]], align 8
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    store double [[SUB]], double* [[TMP10]], align 8
  ; CHECK-NEXT:    ret void
  ;
  entry:
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll

index f06c3f3..a1586c6 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
@@ -6,13 +6,14 @@ target triple = "arm64-apple-darwin"
  define void @load_extract_insert_store_const_idx(<225 x double>* %A) {
  ; CHECK-LABEL: @load_extract_insert_store_const_idx(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 0
+; CHECK-NEXT:    [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
  ; CHECK-NEXT:    [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 1
+; CHECK-NEXT:    [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
  ; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
-; CHECK-NEXT:    store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
+; CHECK-NEXT:    store double [[SUB]], double* [[TMP2]], align 8
  ; CHECK-NEXT:    ret void
  ;
  entry:
@@ -33,13 +34,14 @@ define void @load_extract_insert_store_var_idx_assume_valid(i64 %idx.1, i64 %idx
  ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_1]])
  ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i64 [[IDX_2:%.*]], 225
  ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_2]])
-; CHECK-NEXT:    [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]]
+; CHECK-NEXT:    [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
  ; CHECK-NEXT:    [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]]
+; CHECK-NEXT:    [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
  ; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
-; CHECK-NEXT:    store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
+; CHECK-NEXT:    store double [[SUB]], double* [[TMP2]], align 8
  ; CHECK-NEXT:    ret void
  ;
  entry:
@@ -69,13 +71,14 @@ define void @load_extract_insert_store_var_idx_assume_valid_in_dominating_block(
  ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_2]])
  ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
  ; CHECK:       loop:
-; CHECK-NEXT:    [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]]
+; CHECK-NEXT:    [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
  ; CHECK-NEXT:    [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]]
+; CHECK-NEXT:    [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
  ; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
-; CHECK-NEXT:    store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
+; CHECK-NEXT:    store double [[SUB]], double* [[TMP2]], align 8
  ; CHECK-NEXT:    [[C_2:%.*]] = call i1 @cond()
  ; CHECK-NEXT:    br i1 [[C_2]], label [[LOOP]], label [[EXIT]]
  ; CHECK:       exit:
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

index 472c78a..0e8d477 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -453,7 +453,9 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
  
  define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
  ; CHECK-LABEL: @PR34724(
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
  ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
  ; CHECK-NEXT:    [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
  ; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

index abf5ab5..a08b2c0 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -453,7 +453,9 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
  
  define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
  ; CHECK-LABEL: @PR34724(
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
  ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
  ; CHECK-NEXT:    [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
  ; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll

index 636ab23..c56c13e 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -465,7 +465,9 @@ define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
  ; CHECK-LABEL: @insert_store_ptr_strip(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0
+; CHECK-NEXT:    [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3
  ; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
  ; CHECK-NEXT:    ret void
  ;
author	Florian Hahn <flo@fhahn.com>
	Wed, 22 Sep 2021 08:30:53 +0000 (09:30 +0100)
committer	Florian Hahn <flo@fhahn.com>
	Wed, 22 Sep 2021 08:54:58 +0000 (09:54 +0100)
llvm/lib/Transforms/Vectorize/VectorCombine.cpp		patch \| blob \| history
llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/X86/extract-binop.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/load-insert-store.ll		patch \| blob \| history