From e29874eaa04d24b8c67776bf5729474d671a58f6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 17 Apr 2019 04:55:24 +0000 Subject: [PATCH] Revert "Add basic loop fusion pass." Per request. This reverts commit r358543/ab70da07286e618016e78247e4a24fcb84077fda. llvm-svn: 358553 --- llvm/include/llvm/InitializePasses.h | 1 - llvm/include/llvm/Transforms/Scalar.h | 6 - llvm/include/llvm/Transforms/Scalar/LoopFuse.h | 30 - llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 - llvm/lib/Transforms/Scalar/LoopFuse.cpp | 1212 ------------------------ llvm/lib/Transforms/Scalar/Scalar.cpp | 1 - llvm/test/Transforms/LoopFusion/cannot_fuse.ll | 371 -------- llvm/test/Transforms/LoopFusion/four_loops.ll | 136 --- llvm/test/Transforms/LoopFusion/inner_loops.ll | 86 -- llvm/test/Transforms/LoopFusion/loop_nest.ll | 120 --- llvm/test/Transforms/LoopFusion/simple.ll | 317 ------- 13 files changed, 2283 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Scalar/LoopFuse.h delete mode 100644 llvm/lib/Transforms/Scalar/LoopFuse.cpp delete mode 100644 llvm/test/Transforms/LoopFusion/cannot_fuse.ll delete mode 100644 llvm/test/Transforms/LoopFusion/four_loops.ll delete mode 100644 llvm/test/Transforms/LoopFusion/inner_loops.ll delete mode 100644 llvm/test/Transforms/LoopFusion/loop_nest.ll delete mode 100644 llvm/test/Transforms/LoopFusion/simple.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e0ea208..c1ed435 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -219,7 +219,6 @@ void initializeLoopDeletionLegacyPassPass(PassRegistry&); void initializeLoopDistributeLegacyPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopGuardWideningLegacyPassPass(PassRegistry&); -void initializeLoopFuseLegacyPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 8e86827..402ee6f 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -460,12 +460,6 @@ FunctionPass *createLoopDistributePass(); //===----------------------------------------------------------------------===// // -// LoopFuse - Fuse loops. -// -FunctionPass *createLoopFusePass(); - -//===----------------------------------------------------------------------===// -// // LoopLoadElimination - Perform loop-aware load elimination. // FunctionPass *createLoopLoadEliminationPass(); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopFuse.h b/llvm/include/llvm/Transforms/Scalar/LoopFuse.h deleted file mode 100644 index d3a02db..0000000 --- a/llvm/include/llvm/Transforms/Scalar/LoopFuse.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- LoopFuse.h - Loop Fusion Pass ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the Loop Fusion pass. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H -#define LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -class Function; - -class LoopFusePass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 074abeb..c5fd682 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -122,7 +122,6 @@ #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" -#include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopLoadElimination.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index aa75af9..781d6d8 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -197,7 +197,6 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) -FUNCTION_PASS("loop-fuse", LoopFusePass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index e6f8901..9c33971 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -28,7 +28,6 @@ add_llvm_library(LLVMScalarOpts LoopDeletion.cpp LoopDataPrefetch.cpp LoopDistribute.cpp - LoopFuse.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp LoopInterchange.cpp diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp deleted file mode 100644 index 1d2394b..0000000 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ /dev/null @@ -1,1212 +0,0 @@ -//===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the loop fusion pass. -/// The implementation is largely based on the following document: -/// -/// Code Transformations to Augment the Scope of Loop Fusion in a -/// Production Compiler -/// Christopher Mark Barton -/// MSc Thesis -/// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf -/// -/// The general approach taken is to collect sets of control flow equivalent -/// loops and test whether they can be fused. The necessary conditions for -/// fusion are: -/// 1. The loops must be adjacent (there cannot be any statements between -/// the two loops). -/// 2. The loops must be conforming (they must execute the same number of -/// iterations). -/// 3. The loops must be control flow equivalent (if one loop executes, the -/// other is guaranteed to execute). -/// 4. There cannot be any negative distance dependencies between the loops. -/// If all of these conditions are satisfied, it is safe to fuse the loops. -/// -/// This implementation creates FusionCandidates that represent the loop and the -/// necessary information needed by fusion. It then operates on the fusion -/// candidates, first confirming that the candidate is eligible for fusion. The -/// candidates are then collected into control flow equivalent sets, sorted in -/// dominance order. Each set of control flow equivalent candidates is then -/// traversed, attempting to fuse pairs of candidates in the set. If all -/// requirements for fusion are met, the two candidates are fused, creating a -/// new (fused) candidate which is then added back into the set to consider for -/// additional fusion. -/// -/// This implementation currently does not make any modifications to remove -/// conditions for fusion. Code transformations to make loops conform to each of -/// the conditions for fusion are discussed in more detail in the document -/// above. These can be added to the current implementation in the future. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/LoopFuse.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "loop-fusion" - -STATISTIC(FuseCounter, "Count number of loop fusions performed"); -STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion"); -STATISTIC(InvalidPreheader, "Loop has invalid preheader"); -STATISTIC(InvalidHeader, "Loop has invalid header"); -STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks"); -STATISTIC(InvalidExitBlock, "Loop has invalid exit block"); -STATISTIC(InvalidLatch, "Loop has invalid latch"); -STATISTIC(InvalidLoop, "Loop is invalid"); -STATISTIC(AddressTakenBB, "Basic block has address taken"); -STATISTIC(MayThrowException, "Loop may throw an exception"); -STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access"); -STATISTIC(NotSimplifiedForm, "Loop is not in simplified form"); -STATISTIC(InvalidDependencies, "Dependencies prevent fusion"); -STATISTIC(InvalidTripCount, - "Loop does not have invariant backedge taken count"); -STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop"); -STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same"); -STATISTIC(NonAdjacent, "Candidates are not adjacent"); -STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader"); - -enum FusionDependenceAnalysisChoice { - FUSION_DEPENDENCE_ANALYSIS_SCEV, - FUSION_DEPENDENCE_ANALYSIS_DA, - FUSION_DEPENDENCE_ANALYSIS_ALL, -}; - -static cl::opt FusionDependenceAnalysis( - "loop-fusion-dependence-analysis", - cl::desc("Which dependence analysis should loop fusion use?"), - cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev", - "Use the scalar evolution interface"), - clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da", - "Use the dependence analysis interface"), - clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all", - "Use all available analyses")), - cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore); - -#ifndef NDEBUG -static cl::opt - VerboseFusionDebugging("loop-fusion-verbose-debug", - cl::desc("Enable verbose debugging for Loop Fusion"), - cl::Hidden, cl::init(false), cl::ZeroOrMore); -#endif - -/// This class is used to represent a candidate for loop fusion. When it is -/// constructed, it checks the conditions for loop fusion to ensure that it -/// represents a valid candidate. It caches several parts of a loop that are -/// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead -/// of continually querying the underlying Loop to retrieve these values. It is -/// assumed these will not change throughout loop fusion. -/// -/// The invalidate method should be used to indicate that the FusionCandidate is -/// no longer a valid candidate for fusion. Similarly, the isValid() method can -/// be used to ensure that the FusionCandidate is still valid for fusion. -struct FusionCandidate { - /// Cache of parts of the loop used throughout loop fusion. These should not - /// need to change throughout the analysis and transformation. - /// These parts are cached to avoid repeatedly looking up in the Loop class. - - /// Preheader of the loop this candidate represents - BasicBlock *Preheader; - /// Header of the loop this candidate represents - BasicBlock *Header; - /// Blocks in the loop that exit the loop - BasicBlock *ExitingBlock; - /// The successor block of this loop (where the exiting blocks go to) - BasicBlock *ExitBlock; - /// Latch of the loop - BasicBlock *Latch; - /// The loop that this fusion candidate represents - Loop *L; - /// Vector of instructions in this loop that read from memory - SmallVector MemReads; - /// Vector of instructions in this loop that write to memory - SmallVector MemWrites; - /// Are all of the members of this fusion candidate still valid - bool Valid; - - /// Dominator and PostDominator trees are needed for the - /// FusionCandidateCompare function, required by FusionCandidateSet to - /// determine where the FusionCandidate should be inserted into the set. These - /// are used to establish ordering of the FusionCandidates based on dominance. - const DominatorTree *DT; - const PostDominatorTree *PDT; - - FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT) - : Preheader(L->getLoopPreheader()), Header(L->getHeader()), - ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), - Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) { - - // Walk over all blocks in the loop and check for conditions that may - // prevent fusion. For each block, walk over all instructions and collect - // the memory reads and writes If any instructions that prevent fusion are - // found, invalidate this object and return. - for (BasicBlock *BB : L->blocks()) { - if (BB->hasAddressTaken()) { - AddressTakenBB++; - invalidate(); - return; - } - - for (Instruction &I : *BB) { - if (I.mayThrow()) { - MayThrowException++; - invalidate(); - return; - } - if (StoreInst *SI = dyn_cast(&I)) { - if (SI->isVolatile()) { - ContainsVolatileAccess++; - invalidate(); - return; - } - } - if (LoadInst *LI = dyn_cast(&I)) { - if (LI->isVolatile()) { - ContainsVolatileAccess++; - invalidate(); - return; - } - } - if (I.mayWriteToMemory()) - MemWrites.push_back(&I); - if (I.mayReadFromMemory()) - MemReads.push_back(&I); - } - } - } - - /// Check if all members of the class are valid. - bool isValid() const { - return Preheader && Header && ExitingBlock && ExitBlock && Latch && L && - !L->isInvalid() && Valid; - } - - /// Verify that all members are in sync with the Loop object. - void verify() const { - assert(isValid() && "Candidate is not valid!!"); - assert(!L->isInvalid() && "Loop is invalid!"); - assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync"); - assert(Header == L->getHeader() && "Header is out of sync"); - assert(ExitingBlock == L->getExitingBlock() && - "Exiting Blocks is out of sync"); - assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync"); - assert(Latch == L->getLoopLatch() && "Latch is out of sync"); - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { - dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr") - << "\n" - << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n" - << "\tExitingBB: " - << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n" - << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr") - << "\n" - << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"; - } -#endif - -private: - // This is only used internally for now, to clear the MemWrites and MemReads - // list and setting Valid to false. I can't envision other uses of this right - // now, since once FusionCandidates are put into the FusionCandidateSet they - // are immutable. Thus, any time we need to change/update a FusionCandidate, - // we must create a new one and insert it into the FusionCandidateSet to - // ensure the FusionCandidateSet remains ordered correctly. - void invalidate() { - MemWrites.clear(); - MemReads.clear(); - Valid = false; - } -}; - -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidate &FC) { - if (FC.isValid()) - OS << FC.Preheader->getName(); - else - OS << ""; - - return OS; -} - -struct FusionCandidateCompare { - /// Comparison functor to sort two Control Flow Equivalent fusion candidates - /// into dominance order. - /// If LHS dominates RHS and RHS post-dominates LHS, return true; - /// IF RHS dominates LHS and LHS post-dominates RHS, return false; - bool operator()(const FusionCandidate &LHS, - const FusionCandidate &RHS) const { - const DominatorTree *DT = LHS.DT; - const PostDominatorTree *PDT = LHS.PDT; - - assert(DT && PDT && "Expecting valid dominator tree"); - - if (DT->dominates(LHS.Preheader, RHS.Preheader)) { - // Verify RHS Postdominates LHS - assert(PDT->dominates(RHS.Preheader, LHS.Preheader)); - return true; - } - - if (DT->dominates(RHS.Preheader, LHS.Preheader)) { - // RHS dominates LHS - // Verify LHS post-dominates RHS - assert(PDT->dominates(LHS.Preheader, RHS.Preheader)); - return false; - } - // If LHS does not dominate RHS and RHS does not dominate LHS then there is - // no dominance relationship between the two FusionCandidates. Thus, they - // should not be in the same set together. - llvm_unreachable( - "No dominance relationship between these fusion candidates!"); - } -}; - -namespace { -using LoopVector = SmallVector; - -// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance -// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0 -// dominates FC1 and FC1 post-dominates FC0. -// std::set was chosen because we want a sorted data structure with stable -// iterators. A subsequent patch to loop fusion will enable fusing non-ajdacent -// loops by moving intervening code around. When this intervening code contains -// loops, those loops will be moved also. The corresponding FusionCandidates -// will also need to be moved accordingly. As this is done, having stable -// iterators will simplify the logic. Similarly, having an efficient insert that -// keeps the FusionCandidateSet sorted will also simplify the implementation. -using FusionCandidateSet = std::set; -using FusionCandidateCollection = SmallVector; -} // namespace - -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidateSet &CandSet) { - for (auto IT : CandSet) - OS << IT << "\n"; - - return OS; -} - -static void -printFusionCandidates(const FusionCandidateCollection &FusionCandidates) { - LLVM_DEBUG(dbgs() << "Fusion Candidates: \n"); - for (const auto &CandidateSet : FusionCandidates) { - LLVM_DEBUG({ - dbgs() << "*** Fusion Candidate Set ***\n"; - dbgs() << CandidateSet; - dbgs() << "****************************\n"; - }); - } -} - -/// Collect all loops in function at the same nest level, starting at the -/// outermost level. -/// -/// This data structure collects all loops at the same nest level for a -/// given function (specified by the LoopInfo object). It starts at the -/// outermost level. -struct LoopDepthTree { - using LoopsOnLevelTy = SmallVector; - using iterator = LoopsOnLevelTy::iterator; - using const_iterator = LoopsOnLevelTy::const_iterator; - - LoopDepthTree(LoopInfo &LI) : Depth(1) { - if (!LI.empty()) - LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend())); - } - - /// Test whether a given loop has been removed from the function, and thus is - /// no longer valid. - bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); } - - /// Record that a given loop has been removed from the function and is no - /// longer valid. - void removeLoop(const Loop *L) { RemovedLoops.insert(L); } - - /// Descend the tree to the next (inner) nesting level - void descend() { - LoopsOnLevelTy LoopsOnNextLevel; - - for (const LoopVector &LV : *this) - for (Loop *L : LV) - if (!isRemovedLoop(L) && L->begin() != L->end()) - LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end())); - - LoopsOnLevel = LoopsOnNextLevel; - RemovedLoops.clear(); - Depth++; - } - - bool empty() const { return size() == 0; } - size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); } - unsigned getDepth() const { return Depth; } - - iterator begin() { return LoopsOnLevel.begin(); } - iterator end() { return LoopsOnLevel.end(); } - const_iterator begin() const { return LoopsOnLevel.begin(); } - const_iterator end() const { return LoopsOnLevel.end(); } - -private: - /// Set of loops that have been removed from the function and are no longer - /// valid. - SmallPtrSet RemovedLoops; - - /// Depth of the current level, starting at 1 (outermost loops). - unsigned Depth; - - /// Vector of loops at the current depth level that have the same parent loop - LoopsOnLevelTy LoopsOnLevel; -}; - -#ifndef NDEBUG -static void printLoopVector(const LoopVector &LV) { - dbgs() << "****************************\n"; - for (auto L : LV) - printLoop(*L, dbgs()); - dbgs() << "****************************\n"; -} -#endif - -static void reportLoopFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) - << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) - << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); -} - -struct LoopFuser { -private: - // Sets of control flow equivalent fusion candidates for a given nest level. - FusionCandidateCollection FusionCandidates; - - LoopDepthTree LDT; - DomTreeUpdater DTU; - - LoopInfo &LI; - DominatorTree &DT; - DependenceInfo &DI; - ScalarEvolution &SE; - PostDominatorTree &PDT; - OptimizationRemarkEmitter &ORE; - -public: - LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI, - ScalarEvolution &SE, PostDominatorTree &PDT, - OptimizationRemarkEmitter &ORE, const DataLayout &DL) - : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI), - DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {} - - /// This is the main entry point for loop fusion. It will traverse the - /// specified function and collect candidate loops to fuse, starting at the - /// outermost nesting level and working inwards. - bool fuseLoops(Function &F) { -#ifndef NDEBUG - if (VerboseFusionDebugging) { - LI.print(dbgs()); - } -#endif - - LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName() - << "\n"); - bool Changed = false; - - while (!LDT.empty()) { - LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth " - << LDT.getDepth() << "\n";); - - for (const LoopVector &LV : LDT) { - assert(LV.size() > 0 && "Empty loop set was build!"); - - // Skip singleton loop sets as they do not offer fusion opportunities on - // this level. - if (LV.size() == 1) - continue; -#ifndef NDEBUG - if (VerboseFusionDebugging) { - LLVM_DEBUG({ - dbgs() << " Visit loop set (#" << LV.size() << "):\n"; - printLoopVector(LV); - }); - } -#endif - - collectFusionCandidates(LV); - Changed |= fuseCandidates(); - } - - // Finished analyzing candidates at this level. - // Descend to the next level and clear all of the candidates currently - // collected. Note that it will not be possible to fuse any of the - // existing candidates with new candidates because the new candidates will - // be at a different nest level and thus not be control flow equivalent - // with all of the candidates collected so far. - LLVM_DEBUG(dbgs() << "Descend one level!\n"); - LDT.descend(); - FusionCandidates.clear(); - } - - if (Changed) - LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump();); - -#ifndef NDEBUG - assert(DT.verify()); - assert(PDT.verify()); - LI.verify(DT); - SE.verify(); -#endif - - LLVM_DEBUG(dbgs() << "Loop Fusion complete\n"); - return Changed; - } - -private: - /// Determine if two fusion candidates are control flow equivalent. - /// - /// Two fusion candidates are control flow equivalent if when one executes, - /// the other is guaranteed to execute. This is determined using dominators - /// and post-dominators: if A dominates B and B post-dominates A then A and B - /// are control-flow equivalent. - bool isControlFlowEquivalent(const FusionCandidate &FC0, - const FusionCandidate &FC1) const { - assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders"); - - if (DT.dominates(FC0.Preheader, FC1.Preheader)) - return PDT.dominates(FC1.Preheader, FC0.Preheader); - - if (DT.dominates(FC1.Preheader, FC0.Preheader)) - return PDT.dominates(FC0.Preheader, FC1.Preheader); - - return false; - } - - /// Determine if a fusion candidate (representing a loop) is eligible for - /// fusion. Note that this only checks whether a single loop can be fused - it - /// does not check whether it is *legal* to fuse two loops together. - bool eligibleForFusion(const FusionCandidate &FC) const { - if (!FC.isValid()) { - LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n"); - if (!FC.Preheader) - InvalidPreheader++; - if (!FC.Header) - InvalidHeader++; - if (!FC.ExitingBlock) - InvalidExitingBlock++; - if (!FC.ExitBlock) - InvalidExitBlock++; - if (!FC.Latch) - InvalidLatch++; - if (FC.L->isInvalid()) - InvalidLoop++; - - return false; - } - - // Require ScalarEvolution to be able to determine a trip count. - if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " trip count not computable!\n"); - InvalidTripCount++; - return false; - } - - if (!FC.L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " is not in simplified form!\n"); - NotSimplifiedForm++; - return false; - } - - return true; - } - - /// Iterate over all loops in the given loop set and identify the loops that - /// are eligible for fusion. Place all eligible fusion candidates into Control - /// Flow Equivalent sets, sorted by dominance. - void collectFusionCandidates(const LoopVector &LV) { - for (Loop *L : LV) { - FusionCandidate CurrCand(L, &DT, &PDT); - if (!eligibleForFusion(CurrCand)) - continue; - - // Go through each list in FusionCandidates and determine if L is control - // flow equivalent with the first loop in that list. If it is, append LV. - // If not, go to the next list. - // If no suitable list is found, start another list and add it to - // FusionCandidates. - bool FoundSet = false; - - for (auto &CurrCandSet : FusionCandidates) { - if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) { - CurrCandSet.insert(CurrCand); - FoundSet = true; -#ifndef NDEBUG - if (VerboseFusionDebugging) - LLVM_DEBUG(dbgs() << "Adding " << CurrCand - << " to existing candidate set\n"); -#endif - break; - } - } - if (!FoundSet) { - // No set was found. Create a new set and add to FusionCandidates -#ifndef NDEBUG - if (VerboseFusionDebugging) - LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n"); -#endif - FusionCandidateSet NewCandSet; - NewCandSet.insert(CurrCand); - FusionCandidates.push_back(NewCandSet); - } - NumFusionCandidates++; - } - } - - /// Determine if it is beneficial to fuse two loops. - /// - /// For now, this method simply returns true because we want to fuse as much - /// as possible (primarily to test the pass). This method will evolve, over - /// time, to add heuristics for profitability of fusion. - bool isBeneficialFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1) { - return true; - } - - /// Determine if two fusion candidates have the same trip count (i.e., they - /// execute the same number of iterations). - /// - /// Note that for now this method simply returns a boolean value because there - /// are no mechanisms in loop fusion to handle different trip counts. In the - /// future, this behaviour can be extended to adjust one of the loops to make - /// the trip counts equal (e.g., loop peeling). When this is added, this - /// interface may need to change to return more information than just a - /// boolean value. - bool identicalTripCounts(const FusionCandidate &FC0, - const FusionCandidate &FC1) const { - const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L); - if (isa(TripCount0)) { - UncomputableTripCount++; - LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!"); - return false; - } - - const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L); - if (isa(TripCount1)) { - UncomputableTripCount++; - LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!"); - return false; - } - LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & " - << *TripCount1 << " are " - << (TripCount0 == TripCount1 ? "identical" : "different") - << "\n"); - - return (TripCount0 == TripCount1); - } - - /// Walk each set of control flow equivalent fusion candidates and attempt to - /// fuse them. This does a single linear traversal of all candidates in the - /// set. The conditions for legal fusion are checked at this point. If a pair - /// of fusion candidates passes all legality checks, they are fused together - /// and a new fusion candidate is created and added to the FusionCandidateSet. - /// The original fusion candidates are then removed, as they are no longer - /// valid. - bool fuseCandidates() { - bool Fused = false; - LLVM_DEBUG(printFusionCandidates(FusionCandidates)); - for (auto &CandidateSet : FusionCandidates) { - if (CandidateSet.size() < 2) - continue; - - LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n" - << CandidateSet << "\n"); - - for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) { - assert(!LDT.isRemovedLoop(FC0->L) && - "Should not have removed loops in CandidateSet!"); - auto FC1 = FC0; - for (++FC1; FC1 != CandidateSet.end(); ++FC1) { - assert(!LDT.isRemovedLoop(FC1->L) && - "Should not have removed loops in CandidateSet!"); - - LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump(); - dbgs() << " with\n"; FC1->dump(); dbgs() << "\n"); - - FC0->verify(); - FC1->verify(); - - if (!identicalTripCounts(*FC0, *FC1)) { - LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " - "counts. Not fusing.\n"); - NonEqualTripCount++; - continue; - } - - if (!isAdjacent(*FC0, *FC1)) { - LLVM_DEBUG(dbgs() - << "Fusion candidates are not adjacent. Not fusing.\n"); - NonAdjacent++; - continue; - } - - // For now we skip fusing if the second candidate has any instructions - // in the preheader. This is done because we currently do not have the - // safety checks to determine if it is save to move the preheader of - // the second candidate past the body of the first candidate. Once - // these checks are added, this condition can be removed. - if (!isEmptyPreheader(*FC1)) { - LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty " - "preheader. Not fusing.\n"); - NonEmptyPreheader++; - continue; - } - - if (!dependencesAllowFusion(*FC0, *FC1)) { - LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n"); - continue; - } - - bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1); - LLVM_DEBUG(dbgs() - << "\tFusion appears to be " - << (BeneficialToFuse ? "" : "un") << "profitable!\n"); - if (!BeneficialToFuse) - continue; - - // All analysis has completed and has determined that fusion is legal - // and profitable. At this point, start transforming the code and - // perform fusion. - - LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and " - << *FC1 << "\n"); - - // Report fusion to the Optimization Remarks. - // Note this needs to be done *before* performFusion because - // performFusion will change the original loops, making it not - // possible to identify them after fusion is complete. - reportLoopFusion(*FC0, *FC1, ORE); - - FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT); - FusedCand.verify(); - assert(eligibleForFusion(FusedCand) && - "Fused candidate should be eligible for fusion!"); - - // Notify the loop-depth-tree that these loops are not valid objects - // anymore. - LDT.removeLoop(FC1->L); - - CandidateSet.erase(FC0); - CandidateSet.erase(FC1); - - auto InsertPos = CandidateSet.insert(FusedCand); - - assert(InsertPos.second && - "Unable to insert TargetCandidate in CandidateSet!"); - - // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations - // of the FC1 loop will attempt to fuse the new (fused) loop with the - // remaining candidates in the current candidate set. - FC0 = FC1 = InsertPos.first; - - LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet - << "\n"); - - Fused = true; - } - } - } - return Fused; - } - - /// Rewrite all additive recurrences in a SCEV to use a new loop. - class AddRecLoopReplacer : public SCEVRewriteVisitor { - public: - AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL, - bool UseMax = true) - : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL), - NewL(NewL) {} - - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - const Loop *ExprL = Expr->getLoop(); - SmallVector Operands; - if (ExprL == &OldL) { - Operands.append(Expr->op_begin(), Expr->op_end()); - return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags()); - } - - if (OldL.contains(ExprL)) { - bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE)); - if (!UseMax || !Pos || !Expr->isAffine()) { - Valid = false; - return Expr; - } - return visit(Expr->getStart()); - } - - for (const SCEV *Op : Expr->operands()) - Operands.push_back(visit(Op)); - return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags()); - } - - bool wasValidSCEV() const { return Valid; } - - private: - bool Valid, UseMax; - const Loop &OldL, &NewL; - }; - - /// Return false if the access functions of \p I0 and \p I1 could cause - /// a negative dependence. - bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0, - Instruction &I1, bool EqualIsInvalid) { - Value *Ptr0 = getLoadStorePointerOperand(&I0); - Value *Ptr1 = getLoadStorePointerOperand(&I1); - if (!Ptr0 || !Ptr1) - return false; - - const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0); - const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1); -#ifndef NDEBUG - if (VerboseFusionDebugging) - LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs " - << *SCEVPtr1 << "\n"); -#endif - AddRecLoopReplacer Rewriter(SE, L0, L1); - SCEVPtr0 = Rewriter.visit(SCEVPtr0); -#ifndef NDEBUG - if (VerboseFusionDebugging) - LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0 - << " [Valid: " << Rewriter.wasValidSCEV() << "]\n"); -#endif - if (!Rewriter.wasValidSCEV()) - return false; - - // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by - // L0) and the other is not. We could check if it is monotone and test - // the beginning and end value instead. - - BasicBlock *L0Header = L0.getHeader(); - auto HasNonLinearDominanceRelation = [&](const SCEV *S) { - const SCEVAddRecExpr *AddRec = dyn_cast(S); - if (!AddRec) - return false; - return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) && - !DT.dominates(AddRec->getLoop()->getHeader(), L0Header); - }; - if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation)) - return false; - - ICmpInst::Predicate Pred = - EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE; - bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1); -#ifndef NDEBUG - if (VerboseFusionDebugging) - LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0 - << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1 - << "\n"); -#endif - return IsAlwaysGE; - } - - /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in - /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses - /// specified by @p DepChoice are used to determine this. - bool dependencesAllowFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, Instruction &I0, - Instruction &I1, bool AnyDep, - FusionDependenceAnalysisChoice DepChoice) { -#ifndef NDEBUG - if (VerboseFusionDebugging) { - LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : " - << DepChoice << "\n"); - } -#endif - switch (DepChoice) { - case FUSION_DEPENDENCE_ANALYSIS_SCEV: - return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep); - case FUSION_DEPENDENCE_ANALYSIS_DA: { - auto DepResult = DI.depends(&I0, &I1, true); - if (!DepResult) - return true; -#ifndef NDEBUG - if (VerboseFusionDebugging) { - LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs()); - dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: " - << (DepResult->isOrdered() ? "true" : "false") - << "]\n"); - LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels() - << "\n"); - } -#endif - - if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor()) - LLVM_DEBUG( - dbgs() << "TODO: Implement pred/succ dependence handling!\n"); - - // TODO: Can we actually use the dependence info analysis here? - return false; - } - - case FUSION_DEPENDENCE_ANALYSIS_ALL: - return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep, - FUSION_DEPENDENCE_ANALYSIS_SCEV) || - dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep, - FUSION_DEPENDENCE_ANALYSIS_DA); - } - - llvm_unreachable("Unknown fusion dependence analysis choice!"); - } - - /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused. - bool dependencesAllowFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1) { - LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1 - << "\n"); - assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth()); - assert(DT.dominates(FC0.Preheader, FC1.Preheader)); - - for (Instruction *WriteL0 : FC0.MemWrites) { - for (Instruction *WriteL1 : FC1.MemWrites) - if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1, - /* AnyDep */ false, - FusionDependenceAnalysis)) { - InvalidDependencies++; - return false; - } - for (Instruction *ReadL1 : FC1.MemReads) - if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1, - /* AnyDep */ false, - FusionDependenceAnalysis)) { - InvalidDependencies++; - return false; - } - } - - for (Instruction *WriteL1 : FC1.MemWrites) { - for (Instruction *WriteL0 : FC0.MemWrites) - if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1, - /* AnyDep */ false, - FusionDependenceAnalysis)) { - InvalidDependencies++; - return false; - } - for (Instruction *ReadL0 : FC0.MemReads) - if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1, - /* AnyDep */ false, - FusionDependenceAnalysis)) { - InvalidDependencies++; - return false; - } - } - - // Walk through all uses in FC1. For each use, find the reaching def. If the - // def is located in FC0 then it is is not safe to fuse. - for (BasicBlock *BB : FC1.L->blocks()) - for (Instruction &I : *BB) - for (auto &Op : I.operands()) - if (Instruction *Def = dyn_cast(Op)) - if (FC0.L->contains(Def->getParent())) { - InvalidDependencies++; - return false; - } - - return true; - } - - /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this - /// case, there is no code in between the two fusion candidates, thus making - /// them adjacent. - bool isAdjacent(const FusionCandidate &FC0, - const FusionCandidate &FC1) const { - return FC0.ExitBlock == FC1.Preheader; - } - - bool isEmptyPreheader(const FusionCandidate &FC) const { - return FC.Preheader->size() == 1; - } - - /// Fuse two fusion candidates, creating a new fused loop. - /// - /// This method contains the mechanics of fusing two loops, represented by \p - /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1 - /// postdominates \p FC0 (making them control flow equivalent). It also - /// assumes that the other conditions for fusion have been met: adjacent, - /// identical trip counts, and no negative distance dependencies exist that - /// would prevent fusion. Thus, there is no checking for these conditions in - /// this method. - /// - /// Fusion is performed by rewiring the CFG to update successor blocks of the - /// components of tho loop. Specifically, the following changes are done: - /// - /// 1. The preheader of \p FC1 is removed as it is no longer necessary - /// (because it is currently only a single statement block). - /// 2. The latch of \p FC0 is modified to jump to the header of \p FC1. - /// 3. The latch of \p FC1 i modified to jump to the header of \p FC0. - /// 4. All blocks from \p FC1 are removed from FC1 and added to FC0. - /// - /// All of these modifications are done with dominator tree updates, thus - /// keeping the dominator (and post dominator) information up-to-date. - /// - /// This can be improved in the future by actually merging blocks during - /// fusion. For example, the preheader of \p FC1 can be merged with the - /// preheader of \p FC0. This would allow loops with more than a single - /// statement in the preheader to be fused. Similarly, the latch blocks of the - /// two loops could also be fused into a single block. This will require - /// analysis to prove it is safe to move the contents of the block past - /// existing code, which currently has not been implemented. - Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) { - assert(FC0.isValid() && FC1.isValid() && - "Expecting valid fusion candidates"); - - LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump(); - dbgs() << "Fusion Candidate 1: \n"; FC1.dump();); - - assert(FC1.Preheader == FC0.ExitBlock); - assert(FC1.Preheader->size() == 1 && - FC1.Preheader->getSingleSuccessor() == FC1.Header); - - // Remember the phi nodes originally in the header of FC0 in order to rewire - // them later. However, this is only necessary if the new loop carried - // values might not dominate the exiting branch. While we do not generally - // test if this is the case but simply insert intermediate phi nodes, we - // need to make sure these intermediate phi nodes have different - // predecessors. To this end, we filter the special case where the exiting - // block is the latch block of the first loop. Nothing needs to be done - // anyway as all loop carried values dominate the latch and thereby also the - // exiting branch. - SmallVector OriginalFC0PHIs; - if (FC0.ExitingBlock != FC0.Latch) - for (PHINode &PHI : FC0.Header->phis()) - OriginalFC0PHIs.push_back(&PHI); - - // Replace incoming blocks for header PHIs first. - FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader); - FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch); - - // Then modify the control flow and update DT and PDT. - SmallVector TreeUpdates; - - // The old exiting block of the first loop (FC0) has to jump to the header - // of the second as we need to execute the code in the second header block - // regardless of the trip count. That is, if the trip count is 0, so the - // back edge is never taken, we still have to execute both loop headers, - // especially (but not only!) if the second is a do-while style loop. - // However, doing so might invalidate the phi nodes of the first loop as - // the new values do only need to dominate their latch and not the exiting - // predicate. To remedy this potential problem we always introduce phi - // nodes in the header of the second loop later that select the loop carried - // value, if the second header was reached through an old latch of the - // first, or undef otherwise. This is sound as exiting the first implies the - // second will exit too, __without__ taking the back-edge. [Their - // trip-counts are equal after all. - // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go - // to FC1.Header? I think this is basically what the three sequences are - // trying to accomplish; however, doing this directly in the CFG may mean - // the DT/PDT becomes invalid - FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader, - FC1.Header); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader)); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); - - // The pre-header of L1 is not necessary anymore. - assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader)); - FC1.Preheader->getTerminator()->eraseFromParent(); - new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC1.Preheader, FC1.Header)); - - // Moves the phi nodes from the second to the first loops header block. - while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { - if (SE.isSCEVable(PHI->getType())) - SE.forgetValue(PHI); - if (PHI->hasNUsesOrMore(1)) - PHI->moveBefore(&*FC0.Header->getFirstInsertionPt()); - else - PHI->eraseFromParent(); - } - - // Introduce new phi nodes in the second loop header to ensure - // exiting the first and jumping to the header of the second does not break - // the SSA property of the phis originally in the first loop. See also the - // comment above. - Instruction *L1HeaderIP = &FC1.Header->front(); - for (PHINode *LCPHI : OriginalFC0PHIs) { - int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); - assert(L1LatchBBIdx >= 0 && - "Expected loop carried value to be rewired at this point!"); - - Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); - - PHINode *L1HeaderPHI = PHINode::Create( - LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP); - L1HeaderPHI->addIncoming(LCV, FC0.Latch); - L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()), - FC0.ExitingBlock); - - LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); - } - - // Replace latch terminator destinations. - FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); - FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - - // If FC0.Latch and FC0.ExitingBlock are the same then we have already - // performed the updates above. - if (FC0.Latch != FC0.ExitingBlock) - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.Latch, FC1.Header)); - - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC0.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, - FC1.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC1.Latch, FC1.Header)); - - // Update DT/PDT - DTU.applyUpdates(TreeUpdates); - - LI.removeBlock(FC1.Preheader); - DTU.deleteBB(FC1.Preheader); - DTU.flush(); - - // Is there a way to keep SE up-to-date so we don't need to forget the loops - // and rebuild the information in subsequent passes of fusion? - SE.forgetLoop(FC1.L); - SE.forgetLoop(FC0.L); - - // Merge the loops. - SmallVector Blocks(FC1.L->block_begin(), - FC1.L->block_end()); - for (BasicBlock *BB : Blocks) { - FC0.L->addBlockEntry(BB); - FC1.L->removeBlockFromLoop(BB); - if (LI.getLoopFor(BB) != FC1.L) - continue; - LI.changeLoopFor(BB, FC0.L); - } - while (!FC1.L->empty()) { - const auto &ChildLoopIt = FC1.L->begin(); - Loop *ChildLoop = *ChildLoopIt; - FC1.L->removeChildLoop(ChildLoopIt); - FC0.L->addChildLoop(ChildLoop); - } - - // Delete the now empty loop L1. - LI.erase(FC1.L); - -#ifndef NDEBUG - assert(!verifyFunction(*FC0.Header->getParent(), &errs())); - assert(DT.verify(DominatorTree::VerificationLevel::Fast)); - assert(PDT.verify()); - LI.verify(DT); - SE.verify(); -#endif - - FuseCounter++; - - LLVM_DEBUG(dbgs() << "Fusion done:\n"); - - return FC0.L; - } -}; - -struct LoopFuseLegacy : public FunctionPass { - - static char ID; - - LoopFuseLegacy() : FunctionPass(ID) { - initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LoopSimplifyID); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - auto &LI = getAnalysis().getLoopInfo(); - auto &DT = getAnalysis().getDomTree(); - auto &DI = getAnalysis().getDI(); - auto &SE = getAnalysis().getSE(); - auto &PDT = getAnalysis().getPostDomTree(); - auto &ORE = getAnalysis().getORE(); - - const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL); - return LF.fuseLoops(F); - } -}; - -PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) { - auto &LI = AM.getResult(F); - auto &DT = AM.getResult(F); - auto &DI = AM.getResult(F); - auto &SE = AM.getResult(F); - auto &PDT = AM.getResult(F); - auto &ORE = AM.getResult(F); - - const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL); - bool Changed = LF.fuseLoops(F); - if (!Changed) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve(); - PA.preserve(); - PA.preserve(); - PA.preserve(); - return PA; -} - -char LoopFuseLegacy::ID = 0; - -INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, - false) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false) - -FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); } diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 2584cf0..c91ffda 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -62,7 +62,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeJumpThreadingPass(Registry); initializeLegacyLICMPassPass(Registry); initializeLegacyLoopSinkPassPass(Registry); - initializeLoopFuseLegacyPass(Registry); initializeLoopDataPrefetchLegacyPassPass(Registry); initializeLoopDeletionLegacyPassPass(Registry); initializeLoopAccessLegacyAnalysisPass(Registry); diff --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll deleted file mode 100644 index d742a5b..0000000 --- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll +++ /dev/null @@ -1,371 +0,0 @@ -; RUN: opt -S -loop-fusion -debug-only=loop-fusion -disable-output < %s 2>&1 | FileCheck %s -; REQUIRES: asserts - -@B = common global [1024 x i32] zeroinitializer, align 16 - -; CHECK that the two candidates for fusion are placed into separate candidate -; sets because they are not control flow equivalent. - -; CHECK: Performing Loop Fusion on function non_cfe -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK: bb -; CHECK: **************************** -; CHECK: *** Fusion Candidate Set *** -; CHECK: bb20.preheader -; CHECK: **************************** -; CHECK: Loop Fusion complete -define void @non_cfe(i32* noalias %arg) { -bb: - br label %bb5 - -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb16 - -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 - %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - store i32 %tmp12, i32* %tmp13, align 4 - br label %bb14 - -bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 - -bb16: ; preds = %bb5 - %tmp17 = load i32, i32* %arg, align 4 - %tmp18 = icmp slt i32 %tmp17, 0 - br i1 %tmp18, label %bb20, label %bb33 - -bb20: ; preds = %bb30, %bb16 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb30 ], [ 0, %bb16 ] - %.0 = phi i32 [ 0, %bb16 ], [ %tmp31, %bb30 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb22, label %bb33 - -bb22: ; preds = %bb20 - %tmp23 = add nsw i32 %.0, -3 - %tmp24 = add nuw nsw i64 %indvars.iv, 3 - %tmp25 = trunc i64 %tmp24 to i32 - %tmp26 = mul nsw i32 %tmp23, %tmp25 - %tmp27 = trunc i64 %indvars.iv to i32 - %tmp28 = srem i32 %tmp26, %tmp27 - %tmp29 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp28, i32* %tmp29, align 4 - br label %bb30 - -bb30: ; preds = %bb22 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp31 = add nuw nsw i32 %.0, 1 - br label %bb20 - -bb33: ; preds = %bb20, %bb16 - ret void -} - -; Check that fusion detects the two canddates are not adjacent (the exit block -; of the first candidate is not the preheader of the second candidate). - -; CHECK: Performing Loop Fusion on function non_adjacent -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] -; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] -; CHECK-NEXT: **************************** -; CHECK: Attempting fusion on Candidate Set: -; CHECK-NEXT: [[LOOP1PREHEADER]] -; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Fusion candidates are not adjacent. Not fusing. -; CHECK: Loop Fusion complete -define void @non_adjacent(i32* noalias %arg) { -bb: - br label %bb3 - -bb3: ; preds = %bb11, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] - %exitcond2 = icmp ne i64 %.01, 100 - br i1 %exitcond2, label %bb5, label %bb4 - -bb4: ; preds = %bb3 - br label %bb13 - -bb5: ; preds = %bb3 - %tmp = add nsw i64 %.01, -3 - %tmp6 = add nuw nsw i64 %.01, 3 - %tmp7 = mul nsw i64 %tmp, %tmp6 - %tmp8 = srem i64 %tmp7, %.01 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01 - store i32 %tmp9, i32* %tmp10, align 4 - br label %bb11 - -bb11: ; preds = %bb5 - %tmp12 = add nuw nsw i64 %.01, 1 - br label %bb3 - -bb13: ; preds = %bb4 - br label %bb14 - -bb14: ; preds = %bb23, %bb13 - %.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] - %exitcond = icmp ne i64 %.0, 100 - br i1 %exitcond, label %bb16, label %bb15 - -bb15: ; preds = %bb14 - br label %bb25 - -bb16: ; preds = %bb14 - %tmp17 = add nsw i64 %.0, -3 - %tmp18 = add nuw nsw i64 %.0, 3 - %tmp19 = mul nsw i64 %tmp17, %tmp18 - %tmp20 = srem i64 %tmp19, %.0 - %tmp21 = trunc i64 %tmp20 to i32 - %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 - store i32 %tmp21, i32* %tmp22, align 4 - br label %bb23 - -bb23: ; preds = %bb16 - %tmp24 = add nuw nsw i64 %.0, 1 - br label %bb14 - -bb25: ; preds = %bb15 - ret void -} - -; Check that the different bounds are detected and prevent fusion. - -; CHECK: Performing Loop Fusion on function different_bounds -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] -; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] -; CHECK-NEXT: **************************** -; CHECK: Attempting fusion on Candidate Set: -; CHECK-NEXT: [[LOOP1PREHEADER]] -; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Fusion candidates do not have identical trip counts. Not fusing. -; CHECK: Loop Fusion complete -define void @different_bounds(i32* noalias %arg) { -bb: - br label %bb3 - -bb3: ; preds = %bb11, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] - %exitcond2 = icmp ne i64 %.01, 100 - br i1 %exitcond2, label %bb5, label %bb4 - -bb4: ; preds = %bb3 - br label %bb13 - -bb5: ; preds = %bb3 - %tmp = add nsw i64 %.01, -3 - %tmp6 = add nuw nsw i64 %.01, 3 - %tmp7 = mul nsw i64 %tmp, %tmp6 - %tmp8 = srem i64 %tmp7, %.01 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01 - store i32 %tmp9, i32* %tmp10, align 4 - br label %bb11 - -bb11: ; preds = %bb5 - %tmp12 = add nuw nsw i64 %.01, 1 - br label %bb3 - -bb13: ; preds = %bb4 - br label %bb14 - -bb14: ; preds = %bb23, %bb13 - %.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] - %exitcond = icmp ne i64 %.0, 200 - br i1 %exitcond, label %bb16, label %bb15 - -bb15: ; preds = %bb14 - br label %bb25 - -bb16: ; preds = %bb14 - %tmp17 = add nsw i64 %.0, -3 - %tmp18 = add nuw nsw i64 %.0, 3 - %tmp19 = mul nsw i64 %tmp17, %tmp18 - %tmp20 = srem i64 %tmp19, %.0 - %tmp21 = trunc i64 %tmp20 to i32 - %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 - store i32 %tmp21, i32* %tmp22, align 4 - br label %bb23 - -bb23: ; preds = %bb16 - %tmp24 = add nuw nsw i64 %.0, 1 - br label %bb14 - -bb25: ; preds = %bb15 - ret void -} - -; Check that the negative dependence between the two candidates is identified -; and prevents fusion. - -; CHECK: Performing Loop Fusion on function negative_dependence -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] -; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] -; CHECK-NEXT: **************************** -; CHECK: Attempting fusion on Candidate Set: -; CHECK-NEXT: [[LOOP1PREHEADER]] -; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Memory dependencies do not allow fusion! -; CHECK: Loop Fusion complete -define void @negative_dependence(i32* noalias %arg) { -bb: - br label %bb5 - -bb5: ; preds = %bb9, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb11 - -bb7: ; preds = %bb5 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp8 = trunc i64 %indvars.iv2 to i32 - store i32 %tmp8, i32* %tmp, align 4 - br label %bb9 - -bb9: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 - -bb11: ; preds = %bb18, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb13, label %bb19 - -bb13: ; preds = %bb11 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv.next - %tmp15 = load i32, i32* %tmp14, align 4 - %tmp16 = shl nsw i32 %tmp15, 1 - %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp16, i32* %tmp17, align 4 - br label %bb18 - -bb18: ; preds = %bb13 - br label %bb11 - -bb19: ; preds = %bb11 - ret void -} - -; Check for values defined in Loop 0 and used in Loop 1. -; It is not safe to fuse in this case, because the second loop has -; a use of %.01.lcssa which is defined in the body of loop 0. The -; first loop must execute completely in order to compute the correct -; value of %.01.lcssa to be used in the second loop. - -; CHECK: Performing Loop Fusion on function sumTest -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] -; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] -; CHECK-NEXT: **************************** -; CHECK: Attempting fusion on Candidate Set: -; CHECK-NEXT: [[LOOP1PREHEADER]] -; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Memory dependencies do not allow fusion! -; CHECK: Loop Fusion complete -define i32 @sumTest(i32* noalias %arg) { -bb: - br label %bb6 - -bb6: ; preds = %bb9, %bb - %indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb9 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ] - %exitcond5 = icmp ne i64 %indvars.iv3, 100 - br i1 %exitcond5, label %bb9, label %bb13 - -bb9: ; preds = %bb6 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv3 - %tmp10 = load i32, i32* %tmp, align 4 - %tmp11 = add nsw i32 %.01, %tmp10 - %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 - br label %bb6 - -bb13: ; preds = %bb20, %bb6 - %.01.lcssa = phi i32 [ %.01, %bb6 ], [ %.01.lcssa, %bb20 ] - %indvars.iv = phi i64 [ %indvars.iv.next, %bb20 ], [ 0, %bb6 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb15, label %bb14 - -bb14: ; preds = %bb13 - br label %bb21 - -bb15: ; preds = %bb13 - %tmp16 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv - %tmp17 = load i32, i32* %tmp16, align 4 - %tmp18 = sdiv i32 %tmp17, %.01.lcssa - %tmp19 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp18, i32* %tmp19, align 4 - br label %bb20 - -bb20: ; preds = %bb15 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb13 - -bb21: ; preds = %bb14 - ret i32 %.01.lcssa -} - -; Similar to sumTest above. The first loop computes %add and must -; complete before it is used in the second loop. Thus, these two loops -; also cannot be fused. - -; CHECK: Performing Loop Fusion on function test -; CHECK: Fusion Candidates: -; CHECK: *** Fusion Candidate Set *** -; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]] -; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]] -; CHECK-NEXT: **************************** -; CHECK: Attempting fusion on Candidate Set: -; CHECK-NEXT: [[LOOP1PREHEADER]] -; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Memory dependencies do not allow fusion! -; CHECK: Loop Fusion complete -define float @test(float* nocapture %a, i32 %n) { -entry: - %conv = zext i32 %n to i64 - %cmp32 = icmp eq i32 %n, 0 - br i1 %cmp32, label %for.cond.cleanup7, label %for.body - -for.body: ; preds = %for.body, %entry - %i.034 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %sum1.033 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ] - %idxprom = trunc i64 %i.034 to i32 - %arrayidx = getelementptr inbounds float, float* %a, i32 %idxprom - %0 = load float, float* %arrayidx, align 4 - %add = fadd float %sum1.033, %0 - %inc = add nuw nsw i64 %i.034, 1 - %cmp = icmp ult i64 %inc, %conv - br i1 %cmp, label %for.body, label %for.body8 - -for.body8: ; preds = %for.body, %for.body8 - %i2.031 = phi i64 [ %inc14, %for.body8 ], [ 0, %for.body ] - %idxprom9 = trunc i64 %i2.031 to i32 - %arrayidx10 = getelementptr inbounds float, float* %a, i32 %idxprom9 - %1 = load float, float* %arrayidx10, align 4 - %div = fdiv float %1, %add - store float %div, float* %arrayidx10, align 4 - %inc14 = add nuw nsw i64 %i2.031, 1 - %cmp5 = icmp ult i64 %inc14, %conv - br i1 %cmp5, label %for.body8, label %for.cond.cleanup7 - -for.cond.cleanup7: ; preds = %for.body8, %entry - %sum1.0.lcssa36 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body8 ] - ret float %sum1.0.lcssa36 -} diff --git a/llvm/test/Transforms/LoopFusion/four_loops.ll b/llvm/test/Transforms/LoopFusion/four_loops.ll deleted file mode 100644 index e03de65..0000000 --- a/llvm/test/Transforms/LoopFusion/four_loops.ll +++ /dev/null @@ -1,136 +0,0 @@ -; RUN: opt -S -loop-fusion < %s | FileCheck %s - -@A = common global [1024 x i32] zeroinitializer, align 16 -@B = common global [1024 x i32] zeroinitializer, align 16 -@C = common global [1024 x i32] zeroinitializer, align 16 -@D = common global [1024 x i32] zeroinitializer, align 16 - -; CHECK: void @dep_free -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %exitcond12, label %[[LOOP1BODY:bb[0-9]+]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %exitcond9, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP3PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2HEADER]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP3PREHEADER]] -; CHECK: [[LOOP3PREHEADER]] -; CHECK: br i1 %exitcond6, label %[[LOOP3HEADER:bb[0-9]+]], label %[[LOOP4PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP3HEADER]] -; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] -; CHECK: [[LOOP3LATCH]] -; CHECK: br label %[[LOOP4PREHEADER]] -; CHECK: [[LOOP4PREHEADER]] -; CHECK: br i1 %exitcond, label %[[LOOP4HEADER:bb[0-9]+]], label %[[LOOP4EXIT:bb[0-9]+]] -; CHECK: [[LOOP4EXIT]] -; CHECK: br label %[[FUNCEXIT:bb[0-9]+]] -; CHECK: [[LOOP4HEADER]] -; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] -; CHECK: [[LOOP4LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: [[FUNCEXIT]] -; CHECK: ret void -define void @dep_free() { -bb: - br label %bb13 - -bb13: ; preds = %bb22, %bb - %indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb22 ], [ 0, %bb ] - %.0 = phi i32 [ 0, %bb ], [ %tmp23, %bb22 ] - %exitcond12 = icmp ne i64 %indvars.iv10, 100 - br i1 %exitcond12, label %bb15, label %bb25 - -bb15: ; preds = %bb13 - %tmp = add nsw i32 %.0, -3 - %tmp16 = add nuw nsw i64 %indvars.iv10, 3 - %tmp17 = trunc i64 %tmp16 to i32 - %tmp18 = mul nsw i32 %tmp, %tmp17 - %tmp19 = trunc i64 %indvars.iv10 to i32 - %tmp20 = srem i32 %tmp18, %tmp19 - %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv10 - store i32 %tmp20, i32* %tmp21, align 4 - br label %bb22 - -bb22: ; preds = %bb15 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1 - %tmp23 = add nuw nsw i32 %.0, 1 - br label %bb13 - -bb25: ; preds = %bb35, %bb13 - %indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb35 ], [ 0, %bb13 ] - %.01 = phi i32 [ 0, %bb13 ], [ %tmp36, %bb35 ] - %exitcond9 = icmp ne i64 %indvars.iv7, 100 - br i1 %exitcond9, label %bb27, label %bb38 - -bb27: ; preds = %bb25 - %tmp28 = add nsw i32 %.01, -3 - %tmp29 = add nuw nsw i64 %indvars.iv7, 3 - %tmp30 = trunc i64 %tmp29 to i32 - %tmp31 = mul nsw i32 %tmp28, %tmp30 - %tmp32 = trunc i64 %indvars.iv7 to i32 - %tmp33 = srem i32 %tmp31, %tmp32 - %tmp34 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv7 - store i32 %tmp33, i32* %tmp34, align 4 - br label %bb35 - -bb35: ; preds = %bb27 - %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 - %tmp36 = add nuw nsw i32 %.01, 1 - br label %bb25 - -bb38: ; preds = %bb48, %bb25 - %indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb48 ], [ 0, %bb25 ] - %.02 = phi i32 [ 0, %bb25 ], [ %tmp49, %bb48 ] - %exitcond6 = icmp ne i64 %indvars.iv4, 100 - br i1 %exitcond6, label %bb40, label %bb51 - -bb40: ; preds = %bb38 - %tmp41 = add nsw i32 %.02, -3 - %tmp42 = add nuw nsw i64 %indvars.iv4, 3 - %tmp43 = trunc i64 %tmp42 to i32 - %tmp44 = mul nsw i32 %tmp41, %tmp43 - %tmp45 = trunc i64 %indvars.iv4 to i32 - %tmp46 = srem i32 %tmp44, %tmp45 - %tmp47 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv4 - store i32 %tmp46, i32* %tmp47, align 4 - br label %bb48 - -bb48: ; preds = %bb40 - %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 - %tmp49 = add nuw nsw i32 %.02, 1 - br label %bb38 - -bb51: ; preds = %bb61, %bb38 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb61 ], [ 0, %bb38 ] - %.03 = phi i32 [ 0, %bb38 ], [ %tmp62, %bb61 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb53, label %bb52 - -bb52: ; preds = %bb51 - br label %bb63 - -bb53: ; preds = %bb51 - %tmp54 = add nsw i32 %.03, -3 - %tmp55 = add nuw nsw i64 %indvars.iv, 3 - %tmp56 = trunc i64 %tmp55 to i32 - %tmp57 = mul nsw i32 %tmp54, %tmp56 - %tmp58 = trunc i64 %indvars.iv to i32 - %tmp59 = srem i32 %tmp57, %tmp58 - %tmp60 = getelementptr inbounds [1024 x i32], [1024 x i32]* @D, i64 0, i64 %indvars.iv - store i32 %tmp59, i32* %tmp60, align 4 - br label %bb61 - -bb61: ; preds = %bb53 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp62 = add nuw nsw i32 %.03, 1 - br label %bb51 - -bb63: ; preds = %bb52 - ret void -} diff --git a/llvm/test/Transforms/LoopFusion/inner_loops.ll b/llvm/test/Transforms/LoopFusion/inner_loops.ll deleted file mode 100644 index 2b14ec9..0000000 --- a/llvm/test/Transforms/LoopFusion/inner_loops.ll +++ /dev/null @@ -1,86 +0,0 @@ -; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s - -@A = common global [1024 x [1024 x i32]] zeroinitializer, align 16 -@B = common global [1024 x [1024 x i32]] zeroinitializer, align 16 - -; CHECK: void @dep_free -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void - -define void @dep_free() { -bb: - br label %bb9 - -bb9: ; preds = %bb35, %bb - %indvars.iv6 = phi i64 [ %indvars.iv.next7, %bb35 ], [ 0, %bb ] - %.0 = phi i32 [ 0, %bb ], [ %tmp36, %bb35 ] - %exitcond8 = icmp ne i64 %indvars.iv6, 100 - br i1 %exitcond8, label %bb11, label %bb10 - -bb10: ; preds = %bb9 - br label %bb37 - -bb11: ; preds = %bb9 - br label %bb12 - -bb12: ; preds = %bb21, %bb11 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb21 ], [ 0, %bb11 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb14, label %bb23 - -bb14: ; preds = %bb12 - %tmp = add nsw i32 %.0, -3 - %tmp15 = add nuw nsw i64 %indvars.iv6, 3 - %tmp16 = trunc i64 %tmp15 to i32 - %tmp17 = mul nsw i32 %tmp, %tmp16 - %tmp18 = trunc i64 %indvars.iv6 to i32 - %tmp19 = srem i32 %tmp17, %tmp18 - %tmp20 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv6, i64 %indvars.iv - store i32 %tmp19, i32* %tmp20, align 4 - br label %bb21 - -bb21: ; preds = %bb14 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb12 - -bb23: ; preds = %bb33, %bb12 - %indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb33 ], [ 0, %bb12 ] - %exitcond5 = icmp ne i64 %indvars.iv3, 100 - br i1 %exitcond5, label %bb25, label %bb35 - -bb25: ; preds = %bb23 - %tmp26 = add nsw i32 %.0, -3 - %tmp27 = add nuw nsw i64 %indvars.iv6, 3 - %tmp28 = trunc i64 %tmp27 to i32 - %tmp29 = mul nsw i32 %tmp26, %tmp28 - %tmp30 = trunc i64 %indvars.iv6 to i32 - %tmp31 = srem i32 %tmp29, %tmp30 - %tmp32 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv6, i64 %indvars.iv3 - store i32 %tmp31, i32* %tmp32, align 4 - br label %bb33 - -bb33: ; preds = %bb25 - %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 - br label %bb23 - -bb35: ; preds = %bb23 - %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1 - %tmp36 = add nuw nsw i32 %.0, 1 - br label %bb9 - -bb37: ; preds = %bb10 - ret void -} diff --git a/llvm/test/Transforms/LoopFusion/loop_nest.ll b/llvm/test/Transforms/LoopFusion/loop_nest.ll deleted file mode 100644 index d6cf214..0000000 --- a/llvm/test/Transforms/LoopFusion/loop_nest.ll +++ /dev/null @@ -1,120 +0,0 @@ -; RUN: opt -S -loop-fusion < %s | FileCheck %s -; -; int A[1024][1024]; -; int B[1024][1024]; -; -; #define EXPENSIVE_PURE_COMPUTATION(i) ((i - 3) * (i + 3) % i) -; -; void dep_free() { -; -; for (int i = 0; i < 100; i++) -; for (int j = 0; j < 100; j++) -; A[i][j] = EXPENSIVE_PURE_COMPUTATION(i); -; -; for (int i = 0; i < 100; i++) -; for (int j = 0; j < 100; j++) -; B[i][j] = EXPENSIVE_PURE_COMPUTATION(i); -; } -; -@A = common global [1024 x [1024 x i32]] zeroinitializer, align 16 -@B = common global [1024 x [1024 x i32]] zeroinitializer, align 16 - -; CHECK: void @dep_free -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %exitcond12, label %[[LOOP3PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2HEADER:bb[0-9]+]] -; CHECK: [[LOOP3PREHEADER]] -; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]] -; CHECK: [[LOOP3HEADER]] -; CHECK: br i1 %exitcond9, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]] -; CHECK: [[LOOP2HEADER]] -; CHECK: br i1 %exitcond6, label %[[LOOP4PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2EXITBLOCK:bb[0-9]+]] -; CHECK: [[LOOP4PREHEADER]] -; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]] -; CHECK: [[LOOP2EXITBLOCK]] -; CHECK-NEXT: br label %[[FUNCEXIT:bb[0-9]+]] -; CHECK: [[LOOP4HEADER]] -; CHECK: br i1 %exitcond, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER:bb[0-9]+]] -; CHECK: [[FUNCEXIT]] -; CHECK: ret void - -; TODO: The current version of loop fusion does not allow the inner loops to be -; fused because they are not control flow equivalent and adjacent. These are -; limitations that can be addressed in future improvements to fusion. -define void @dep_free() { -bb: - br label %bb13 - -bb13: ; preds = %bb27, %bb - %indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb27 ], [ 0, %bb ] - %.0 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ] - %exitcond12 = icmp ne i64 %indvars.iv10, 100 - br i1 %exitcond12, label %bb16, label %bb30 - -bb16: ; preds = %bb25, %bb13 - %indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb25 ], [ 0, %bb13 ] - %exitcond9 = icmp ne i64 %indvars.iv7, 100 - br i1 %exitcond9, label %bb18, label %bb27 - -bb18: ; preds = %bb16 - %tmp = add nsw i32 %.0, -3 - %tmp19 = add nuw nsw i64 %indvars.iv10, 3 - %tmp20 = trunc i64 %tmp19 to i32 - %tmp21 = mul nsw i32 %tmp, %tmp20 - %tmp22 = trunc i64 %indvars.iv10 to i32 - %tmp23 = srem i32 %tmp21, %tmp22 - %tmp24 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv10, i64 %indvars.iv7 - store i32 %tmp23, i32* %tmp24, align 4 - br label %bb25 - -bb25: ; preds = %bb18 - %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 - br label %bb16 - -bb27: ; preds = %bb16 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1 - %tmp28 = add nuw nsw i32 %.0, 1 - br label %bb13 - -bb30: ; preds = %bb45, %bb13 - %indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb45 ], [ 0, %bb13 ] - %.02 = phi i32 [ 0, %bb13 ], [ %tmp46, %bb45 ] - %exitcond6 = icmp ne i64 %indvars.iv4, 100 - br i1 %exitcond6, label %bb33, label %bb31 - -bb31: ; preds = %bb30 - br label %bb47 - -bb33: ; preds = %bb43, %bb30 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb43 ], [ 0, %bb30 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb35, label %bb45 - -bb35: ; preds = %bb33 - %tmp36 = add nsw i32 %.02, -3 - %tmp37 = add nuw nsw i64 %indvars.iv4, 3 - %tmp38 = trunc i64 %tmp37 to i32 - %tmp39 = mul nsw i32 %tmp36, %tmp38 - %tmp40 = trunc i64 %indvars.iv4 to i32 - %tmp41 = srem i32 %tmp39, %tmp40 - %tmp42 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv - store i32 %tmp41, i32* %tmp42, align 4 - br label %bb43 - -bb43: ; preds = %bb35 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb33 - -bb45: ; preds = %bb33 - %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 - %tmp46 = add nuw nsw i32 %.02, 1 - br label %bb30 - -bb47: ; preds = %bb31 - ret void -} diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll deleted file mode 100644 index 9d2db2b..0000000 --- a/llvm/test/Transforms/LoopFusion/simple.ll +++ /dev/null @@ -1,317 +0,0 @@ -; RUN: opt -S -loop-fusion < %s | FileCheck %s - -@B = common global [1024 x i32] zeroinitializer, align 16 - -; CHECK: void @dep_free -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void -define void @dep_free(i32* noalias %arg) { -bb: - br label %bb5 - -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb17 - -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 - %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - store i32 %tmp12, i32* %tmp13, align 4 - br label %bb14 - -bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 - -bb17: ; preds = %bb27, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ] - %.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb19, label %bb18 - -bb18: ; preds = %bb17 - br label %bb29 - -bb19: ; preds = %bb17 - %tmp20 = add nsw i32 %.0, -3 - %tmp21 = add nuw nsw i64 %indvars.iv, 3 - %tmp22 = trunc i64 %tmp21 to i32 - %tmp23 = mul nsw i32 %tmp20, %tmp22 - %tmp24 = trunc i64 %indvars.iv to i32 - %tmp25 = srem i32 %tmp23, %tmp24 - %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp25, i32* %tmp26, align 4 - br label %bb27 - -bb27: ; preds = %bb19 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp28 = add nuw nsw i32 %.0, 1 - br label %bb17 - -bb29: ; preds = %bb18 - ret void -} - -; CHECK: void @dep_free_parametric -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void -define void @dep_free_parametric(i32* noalias %arg, i64 %arg2) { -bb: - br label %bb3 - -bb3: ; preds = %bb12, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp13, %bb12 ] - %tmp = icmp slt i64 %.01, %arg2 - br i1 %tmp, label %bb5, label %bb15 - -bb5: ; preds = %bb3 - %tmp6 = add nsw i64 %.01, -3 - %tmp7 = add nuw nsw i64 %.01, 3 - %tmp8 = mul nsw i64 %tmp6, %tmp7 - %tmp9 = srem i64 %tmp8, %.01 - %tmp10 = trunc i64 %tmp9 to i32 - %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %.01 - store i32 %tmp10, i32* %tmp11, align 4 - br label %bb12 - -bb12: ; preds = %bb5 - %tmp13 = add nuw nsw i64 %.01, 1 - br label %bb3 - -bb15: ; preds = %bb25, %bb3 - %.0 = phi i64 [ 0, %bb3 ], [ %tmp26, %bb25 ] - %tmp16 = icmp slt i64 %.0, %arg2 - br i1 %tmp16, label %bb18, label %bb17 - -bb17: ; preds = %bb15 - br label %bb27 - -bb18: ; preds = %bb15 - %tmp19 = add nsw i64 %.0, -3 - %tmp20 = add nuw nsw i64 %.0, 3 - %tmp21 = mul nsw i64 %tmp19, %tmp20 - %tmp22 = srem i64 %tmp21, %.0 - %tmp23 = trunc i64 %tmp22 to i32 - %tmp24 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 - store i32 %tmp23, i32* %tmp24, align 4 - br label %bb25 - -bb25: ; preds = %bb18 - %tmp26 = add nuw nsw i64 %.0, 1 - br label %bb15 - -bb27: ; preds = %bb17 - ret void -} - -; CHECK: void @raw_only -; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void -define void @raw_only(i32* noalias %arg) { -bb: - br label %bb5 - -bb5: ; preds = %bb9, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb11 - -bb7: ; preds = %bb5 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp8 = trunc i64 %indvars.iv2 to i32 - store i32 %tmp8, i32* %tmp, align 4 - br label %bb9 - -bb9: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 - -bb11: ; preds = %bb18, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb13, label %bb19 - -bb13: ; preds = %bb11 - %tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv - %tmp15 = load i32, i32* %tmp14, align 4 - %tmp16 = shl nsw i32 %tmp15, 1 - %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp16, i32* %tmp17, align 4 - br label %bb18 - -bb18: ; preds = %bb13 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb11 - -bb19: ; preds = %bb11 - ret void -} - -; CHECK: void @raw_only_parametric -; CHECK-NEXT: bb: -; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void -define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) { -bb: - br label %bb5 - -bb5: ; preds = %bb11, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb11 ], [ 0, %bb ] - %tmp = sext i32 %arg4 to i64 - %tmp6 = icmp slt i64 %indvars.iv2, %tmp - br i1 %tmp6, label %bb8, label %bb14 - -bb8: ; preds = %bb5 - %tmp9 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp10 = trunc i64 %indvars.iv2 to i32 - store i32 %tmp10, i32* %tmp9, align 4 - br label %bb11 - -bb11: ; preds = %bb8 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 - -bb14: ; preds = %bb22, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb22 ], [ 0, %bb5 ] - %tmp13 = sext i32 %arg4 to i64 - %tmp15 = icmp slt i64 %indvars.iv, %tmp13 - br i1 %tmp15, label %bb17, label %bb23 - -bb17: ; preds = %bb14 - %tmp18 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv - %tmp19 = load i32, i32* %tmp18, align 4 - %tmp20 = shl nsw i32 %tmp19, 1 - %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv - store i32 %tmp20, i32* %tmp21, align 4 - br label %bb22 - -bb22: ; preds = %bb17 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb14 - -bb23: ; preds = %bb14 - ret void -} - -; CHECK: void @forward_dep -; CHECK-NEXT: bb: -; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] -; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: ret void -define void @forward_dep(i32* noalias %arg) { -bb: - br label %bb5 - -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb17 - -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 - %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - store i32 %tmp12, i32* %tmp13, align 4 - br label %bb14 - -bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 - -bb17: ; preds = %bb25, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb25 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb19, label %bb26 - -bb19: ; preds = %bb17 - %tmp20 = add nsw i64 %indvars.iv, -3 - %tmp21 = getelementptr inbounds i32, i32* %arg, i64 %tmp20 - %tmp22 = load i32, i32* %tmp21, align 4 - %tmp23 = mul nsw i32 %tmp22, 3 - %tmp24 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv - store i32 %tmp23, i32* %tmp24, align 4 - br label %bb25 - -bb25: ; preds = %bb19 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb17 - -bb26: ; preds = %bb17 - ret void -} -- 2.7.4