From 88d8f10baf30b0df18eb542c426afc29b69f1313 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 10 May 2021 13:55:42 -0400 Subject: [PATCH] [PassManager] add helper function to hold set of vector passes (2nd try) This is better no-functional-change-intended than the 1st attempt. As noted in D102002, there were at least 2 diffs that went unchecked in pass manager regressions tests: different pass parameters (SimplifyCFG) and an extension point/callback. Those should be lifted from the original code blocks correctly now. --- llvm/include/llvm/Passes/PassBuilder.h | 3 + .../llvm/Transforms/IPO/PassManagerBuilder.h | 3 +- llvm/lib/Passes/PassBuilder.cpp | 239 +++++++++++---------- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 229 ++++++++++---------- 4 files changed, 249 insertions(+), 225 deletions(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 339a2b7..f8252b9 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -709,6 +709,9 @@ private: void addRequiredLTOPreLinkPasses(ModulePassManager &MPM); + void addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, + bool IsLTO); + static Optional> parsePipelineText(StringRef Text); diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index a9928c3..46d9bee 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -218,7 +218,8 @@ private: void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); - + void addVectorPasses(legacy::PassManagerBase &PM, bool IsLTO); + public: /// populateFunctionPassManager - This fills in the function pass manager, /// which is expected to be run on each function immediately as it is diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e6554f6..6c2315c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1201,6 +1201,127 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, return MPM; } +/// TODO: Should LTO cause any differences to this set of passes? +void PassBuilder::addVectorPasses(OptimizationLevel Level, + FunctionPassManager &FPM, bool IsLTO) { + FPM.addPass(LoopVectorizePass( + LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); + + if (IsLTO) { + // The vectorizer may have significantly shortened a loop body; unroll + // again. Unroll small loops to hide loop backedge latency and saturate any + // parallel execution resources of an out-of-order processor. We also then + // need to clean up redundancies and loop invariant code. + // FIXME: It would be really good to use a loop-integrated instruction + // combiner for cleanup here so that the unrolling and LICM can be pipelined + // across the loop nests. + // We do UnrollAndJam in a separate LPM to ensure it happens before unroll + if (EnableUnrollAndJam && PTO.LoopUnrolling) + FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); + FPM.addPass(LoopUnrollPass(LoopUnrollOptions( + Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); + FPM.addPass(WarnMissedTransformationsPass()); + } + + if (!IsLTO) { + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + FPM.addPass(LoopLoadEliminationPass()); + } + // Cleanup after the loop optimization passes. + FPM.addPass(InstCombinePass()); + + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correlated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + FPM.addPass(EarlyCSEPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(InstCombinePass()); + LoopPassManager LPM; + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == + OptimizationLevel::O3)); + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM), EnableMSSALoopDependency, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + } + + if (IsLTO) { + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))); + } else { + // Now that we've formed fast to execute loop structures, we do further + // optimizations. These are run afterward as they might block doing complex + // analyses and transforms such as what are needed for loop vectorization. + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); + } + if (IsLTO) { + FPM.addPass(SCCPPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(BDCEPass()); + } + + // Optimize parallel scalar instruction chains into SIMD instructions. + if (PTO.SLPVectorization) { + FPM.addPass(SLPVectorizerPass()); + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + FPM.addPass(EarlyCSEPass()); + } + } + // Enhance/cleanup vector code. + FPM.addPass(VectorCombinePass()); + + if (!IsLTO) { + FPM.addPass(InstCombinePass()); + // Unroll small loops to hide loop backedge latency and saturate any + // parallel execution resources of an out-of-order processor. We also then + // need to clean up redundancies and loop invariant code. + // FIXME: It would be really good to use a loop-integrated instruction + // combiner for cleanup here so that the unrolling and LICM can be pipelined + // across the loop nests. + // We do UnrollAndJam in a separate LPM to ensure it happens before unroll + if (EnableUnrollAndJam && PTO.LoopUnrolling) { + FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); + } + FPM.addPass(LoopUnrollPass(LoopUnrollOptions( + Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); + FPM.addPass(WarnMissedTransformationsPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true)); + } + + // Now that we've vectorized and unrolled loops, we may have more refined + // alignment information, try to re-derive it here. + FPM.addPass(AlignmentFromAssumptionsPass()); + + if (IsLTO) + FPM.addPass(InstCombinePass()); +} + ModulePassManager PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, bool LTOPreLink) { @@ -1295,91 +1416,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // from the TargetLibraryInfo. OptimizePM.addPass(InjectTLIMappings()); - // Now run the core loop vectorizer. - OptimizePM.addPass(LoopVectorizePass( - LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); - - // Eliminate loads by forwarding stores from the previous iteration to loads - // of the current iteration. - OptimizePM.addPass(LoopLoadEliminationPass()); - - // Cleanup after the loop optimization passes. - OptimizePM.addPass(InstCombinePass()); - - if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { - // At higher optimization levels, try to clean up any runtime overlap and - // alignment checks inserted by the vectorizer. We want to track correlated - // runtime checks for two inner loops in the same outer loop, fold any - // common computations, hoist loop-invariant aspects out of any outer loop, - // and unswitch the runtime checks if possible. Once hoisted, we may have - // dead (or speculatable) control flows or more combining opportunities. - OptimizePM.addPass(EarlyCSEPass()); - OptimizePM.addPass(CorrelatedValuePropagationPass()); - OptimizePM.addPass(InstCombinePass()); - LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == - OptimizationLevel::O3)); - OptimizePM.addPass( - RequireAnalysisPass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), EnableMSSALoopDependency, - /*UseBlockFrequencyInfo=*/true)); - OptimizePM.addPass(SimplifyCFGPass()); - OptimizePM.addPass(InstCombinePass()); - } - - // Now that we've formed fast to execute loop structures, we do further - // optimizations. These are run afterward as they might block doing complex - // analyses and transforms such as what are needed for loop vectorization. - - // Cleanup after loop vectorization, etc. Simplification passes like CVP and - // GVN, loop transforms, and others have already run, so it's now better to - // convert to more optimized IR using more aggressive simplify CFG options. - // The extra sinking transform can create larger basic blocks, so do this - // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); - - // Optimize parallel scalar instruction chains into SIMD instructions. - if (PTO.SLPVectorization) { - OptimizePM.addPass(SLPVectorizerPass()); - if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { - OptimizePM.addPass(EarlyCSEPass()); - } - } - - // Enhance/cleanup vector code. - OptimizePM.addPass(VectorCombinePass()); - OptimizePM.addPass(InstCombinePass()); - - // Unroll small loops to hide loop backedge latency and saturate any parallel - // execution resources of an out-of-order processor. We also then need to - // clean up redundancies and loop invariant code. - // FIXME: It would be really good to use a loop-integrated instruction - // combiner for cleanup here so that the unrolling and LICM can be pipelined - // across the loop nests. - // We do UnrollAndJam in a separate LPM to ensure it happens before unroll - if (EnableUnrollAndJam && PTO.LoopUnrolling) { - OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); - } - OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions( - Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll))); - OptimizePM.addPass(WarnMissedTransformationsPass()); - OptimizePM.addPass(InstCombinePass()); - OptimizePM.addPass(RequireAnalysisPass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true)); - - // Now that we've vectorized and unrolled loops, we may have more refined - // alignment information, try to re-derive it here. - OptimizePM.addPass(AlignmentFromAssumptionsPass()); + addVectorPasses(Level, OptimizePM, /* IsLTO */ false); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff @@ -1825,39 +1862,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); MainFPM.addPass(LoopDistributePass()); - MainFPM.addPass(LoopVectorizePass( - LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); - // The vectorizer may have significantly shortened a loop body; unroll again. - MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions( - Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll))); - - MainFPM.addPass(WarnMissedTransformationsPass()); - - MainFPM.addPass(InstCombinePass()); - MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))); - MainFPM.addPass(SCCPPass()); - MainFPM.addPass(InstCombinePass()); - MainFPM.addPass(BDCEPass()); - - // More scalar chains could be vectorized due to more alias information - if (PTO.SLPVectorization) { - MainFPM.addPass(SLPVectorizerPass()); - if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { - MainFPM.addPass(EarlyCSEPass()); - } - } - - MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization. - - // After vectorization, assume intrinsics may tell us more about pointer - // alignments. - MainFPM.addPass(AlignmentFromAssumptionsPass()); - // FIXME: Conditionally run LoadCombine here, after it's ported - // (in case we still have this pass, given its questionable usefulness). + addVectorPasses(Level, MainFPM, /* IsLTO */ true); - MainFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 87731e5..bdd2c2c 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -523,6 +523,124 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createControlHeightReductionLegacyPass()); } +/// FIXME: Should LTO cause any differences to this set of passes? +void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, + bool IsLTO) { + PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + + if (IsLTO) { + // The vectorizer may have significantly shortened a loop body; unroll + // again. Unroll small loops to hide loop backedge latency and saturate any + // parallel execution resources of an out-of-order processor. We also then + // need to clean up redundancies and loop invariant code. + // FIXME: It would be really good to use a loop-integrated instruction + // combiner for cleanup here so that the unrolling and LICM can be pipelined + // across the loop nests. + // We do UnrollAndJam in a separate LPM to ensure it happens before unroll + if (EnableUnrollAndJam && !DisableUnrollLoops) + PM.add(createLoopUnrollAndJamPass(OptLevel)); + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + PM.add(createWarnMissedTransformationsPass()); + } + + if (!IsLTO) { + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + PM.add(createLoopLoadEliminationPass()); + } + // Cleanup after the loop optimization passes. + PM.add(createInstructionCombiningPass()); + + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correlated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + PM.add(createEarlyCSEPass()); + PM.add(createCorrelatedValuePropagationPass()); + PM.add(createInstructionCombiningPass()); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + PM.add(createCFGSimplificationPass()); + PM.add(createInstructionCombiningPass()); + } + + if (IsLTO) { + PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert + .hoistCommonInsts(true))); + } else { + // Now that we've formed fast to execute loop structures, we do further + // optimizations. These are run afterward as they might block doing complex + // analyses and transforms such as what are needed for loop vectorization. + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + PM.add(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); + } + if (IsLTO) { + PM.add(createSCCPPass()); // Propagate exposed constants + PM.add(createInstructionCombiningPass()); // Clean up again + PM.add(createBitTrackingDCEPass()); + } + + // Optimize parallel scalar instruction chains into SIMD instructions. + if (SLPVectorize) { + PM.add(createSLPVectorizerPass()); + if (OptLevel > 1 && ExtraVectorizerPasses) + PM.add(createEarlyCSEPass()); + } + + // Enhance/cleanup vector code. + PM.add(createVectorCombinePass()); + + if (!IsLTO) { + addExtensionsToPM(EP_Peephole, PM); + PM.add(createInstructionCombiningPass()); + + if (EnableUnrollAndJam && !DisableUnrollLoops) { + // Unroll and Jam. We do this before unroll but need to be in a separate + // loop pass manager in order for the outer loop to be processed by + // unroll and jam before the inner loop is unrolled. + PM.add(createLoopUnrollAndJamPass(OptLevel)); + } + + // Unroll small loops + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + + if (!DisableUnrollLoops) { + // LoopUnroll may generate some redundency to cleanup. + PM.add(createInstructionCombiningPass()); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + PM.add(createWarnMissedTransformationsPass()); + } + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + PM.add(createAlignmentFromAssumptionsPass()); + + if (IsLTO) + PM.add(createInstructionCombiningPass()); +} + void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link @@ -794,86 +912,7 @@ void PassManagerBuilder::populateModulePassManager( // llvm.loop.distribute=true or when -enable-loop-distribute is specified. MPM.add(createLoopDistributePass()); - MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); - - // Eliminate loads by forwarding stores from the previous iteration to loads - // of the current iteration. - MPM.add(createLoopLoadEliminationPass()); - - // FIXME: Because of #pragma vectorize enable, the passes below are always - // inserted in the pipeline, even when the vectorizer doesn't run (ex. when - // on -O1 and no #pragma is found). Would be good to have these two passes - // as function calls, so that we can only pass them when the vectorizer - // changed the code. - MPM.add(createInstructionCombiningPass()); - if (OptLevel > 1 && ExtraVectorizerPasses) { - // At higher optimization levels, try to clean up any runtime overlap and - // alignment checks inserted by the vectorizer. We want to track correllated - // runtime checks for two inner loops in the same outer loop, fold any - // common computations, hoist loop-invariant aspects out of any outer loop, - // and unswitch the runtime checks if possible. Once hoisted, we may have - // dead (or speculatable) control flows or more combining opportunities. - MPM.add(createEarlyCSEPass()); - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createInstructionCombiningPass()); - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); - } - - // Cleanup after loop vectorization, etc. Simplification passes like CVP and - // GVN, loop transforms, and others have already run, so it's now better to - // convert to more optimized IR using more aggressive simplify CFG options. - // The extra sinking transform can create larger basic blocks, so do this - // before SLP vectorization. - MPM.add(createCFGSimplificationPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); - - if (SLPVectorize) { - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - if (OptLevel > 1 && ExtraVectorizerPasses) { - MPM.add(createEarlyCSEPass()); - } - } - - // Enhance/cleanup vector code. - MPM.add(createVectorCombinePass()); - - addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createInstructionCombiningPass()); - - if (EnableUnrollAndJam && !DisableUnrollLoops) { - // Unroll and Jam. We do this before unroll but need to be in a separate - // loop pass manager in order for the outer loop to be processed by - // unroll and jam before the inner loop is unrolled. - MPM.add(createLoopUnrollAndJamPass(OptLevel)); - } - - // Unroll small loops - MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, - ForgetAllSCEVInLoopUnroll)); - - if (!DisableUnrollLoops) { - // LoopUnroll may generate some redundency to cleanup. - MPM.add(createInstructionCombiningPass()); - - // Runtime unrolling will introduce runtime check in loop prologue. If the - // unrolled loop is a inner loop, then the prologue will be inside the - // outer loop. LICM pass can help to promote the runtime check out if the - // checked value is loop invariant. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - } - - MPM.add(createWarnMissedTransformationsPass()); - - // After vectorization and unrolling, assume intrinsics may tell us more - // about pointer alignments. - MPM.add(createAlignmentFromAssumptionsPass()); + addVectorPasses(MPM, /* IsLTO */ false); // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes @@ -1083,35 +1122,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); PM.add(createLoopDistributePass()); - PM.add(createLoopVectorizePass(true, !LoopVectorize)); - // The vectorizer may have significantly shortened a loop body; unroll again. - PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, - ForgetAllSCEVInLoopUnroll)); - - PM.add(createWarnMissedTransformationsPass()); - - // Now that we've optimized loops (in particular loop induction variables), - // we may have exposed more scalar opportunities. Run parts of the scalar - // optimizer again at this point. - PM.add(createInstructionCombiningPass()); // Initial cleanup - PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert - .hoistCommonInsts(true))); - PM.add(createSCCPPass()); // Propagate exposed constants - PM.add(createInstructionCombiningPass()); // Clean up again - PM.add(createBitTrackingDCEPass()); - - // More scalar chains could be vectorized due to more alias information - if (SLPVectorize) - PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - - PM.add(createVectorCombinePass()); // Clean up partial vectorization. - - // After vectorization, assume intrinsics may tell us more about pointer - // alignments. - PM.add(createAlignmentFromAssumptionsPass()); - // Cleanup and simplify the code after the scalar optimizations. - PM.add(createInstructionCombiningPass()); + addVectorPasses(PM, /* IsLTO */ true); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); -- 2.7.4