From 403cfe5d707d0965218418b8e1e21caf5288fd65 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 28 May 2021 12:33:54 -0400
Subject: [PATCH] [PassManager] unify late simplifycfg options between regular
 and LTO pipelines

This is split off from D102002, and I think it is clear that
the difference in behavior was not intended. Options were
added to SimplifyCFG over time, but different chunks of
the pass pipelines were not kept in sync.
---
 llvm/lib/Passes/PassBuilder.cpp                    | 35 ++++++++++-----------
 llvm/lib/Transforms/IPO/PassManagerBuilder.cpp     | 36 ++++++++++------------
 .../PhaseOrdering/X86/simplifycfg-late.ll          | 34 ++++++--------------
 3 files changed, 41 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index b39481d..2e782fe 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1246,25 +1246,22 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
     FPM.addPass(InstCombinePass());
   }
 
-  if (IsLTO) {
-    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
-  } else {
-    // Now that we've formed fast to execute loop structures, we do further
-    // optimizations. These are run afterward as they might block doing complex
-    // analyses and transforms such as what are needed for loop vectorization.
-
-    // Cleanup after loop vectorization, etc. Simplification passes like CVP and
-    // GVN, loop transforms, and others have already run, so it's now better to
-    // convert to more optimized IR using more aggressive simplify CFG options.
-    // The extra sinking transform can create larger basic blocks, so do this
-    // before SLP vectorization.
-    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
-                                    .forwardSwitchCondToPhi(true)
-                                    .convertSwitchToLookupTable(true)
-                                    .needCanonicalLoops(false)
-                                    .hoistCommonInsts(true)
-                                    .sinkCommonInsts(true)));
-  }
+  // Now that we've formed fast to execute loop structures, we do further
+  // optimizations. These are run afterward as they might block doing complex
+  // analyses and transforms such as what are needed for loop vectorization.
+
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+                                  .forwardSwitchCondToPhi(true)
+                                  .convertSwitchToLookupTable(true)
+                                  .needCanonicalLoops(false)
+                                  .hoistCommonInsts(true)
+                                  .sinkCommonInsts(true)));
+
   if (IsLTO) {
     FPM.addPass(SCCPPass());
     FPM.addPass(InstCombinePass());
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index bdd2c2c..fd114b8 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -568,26 +568,22 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
     PM.add(createInstructionCombiningPass());
   }
 
-  if (IsLTO) {
-    PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
-                                           .hoistCommonInsts(true)));
-  } else {
-    // Now that we've formed fast to execute loop structures, we do further
-    // optimizations. These are run afterward as they might block doing complex
-    // analyses and transforms such as what are needed for loop vectorization.
-
-    // Cleanup after loop vectorization, etc. Simplification passes like CVP and
-    // GVN, loop transforms, and others have already run, so it's now better to
-    // convert to more optimized IR using more aggressive simplify CFG options.
-    // The extra sinking transform can create larger basic blocks, so do this
-    // before SLP vectorization.
-    PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
-                                           .forwardSwitchCondToPhi(true)
-                                           .convertSwitchToLookupTable(true)
-                                           .needCanonicalLoops(false)
-                                           .hoistCommonInsts(true)
-                                           .sinkCommonInsts(true)));
-  }
+  // Now that we've formed fast to execute loop structures, we do further
+  // optimizations. These are run afterward as they might block doing complex
+  // analyses and transforms such as what are needed for loop vectorization.
+
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+                                         .forwardSwitchCondToPhi(true)
+                                         .convertSwitchToLookupTable(true)
+                                         .needCanonicalLoops(false)
+                                         .hoistCommonInsts(true)
+                                         .sinkCommonInsts(true)));
+
   if (IsLTO) {
     PM.add(createSCCPPass());                 // Propagate exposed constants
     PM.add(createInstructionCombiningPass()); // Clean up again
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll b/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
index 6a71445..3480e4c 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
@@ -9,32 +9,16 @@ target triple = "x86_64--"
 define i32 @f(i32 %c) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 42, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 43, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 44, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    i32 45, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    i32 46, label [[SW_BB4:%.*]]
-; CHECK-NEXT:    i32 47, label [[SW_BB5:%.*]]
-; CHECK-NEXT:    i32 48, label [[SW_BB6:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb4:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb5:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb6:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = add i32 [[C:%.*]], -42
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[SWITCH_TABLEIDX]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ 1, [[SW_BB6]] ], [ 62, [[SW_BB5]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    ret i32 15
 ;
 entry:
   switch i32 %c, label %sw.default [
-- 
2.7.4