void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
+void initializeIROutlinerLegacyPassPass(PassRegistry&);
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&);
void initializeIRTranslatorPass(PassRegistry&);
void initializeIVUsersWrapperPassPass(PassRegistry&);
ModulePass *createHotColdSplittingPass();
//===----------------------------------------------------------------------===//
+/// createIROutlinerPass - This pass finds similar code regions and factors
+/// those regions out into functions.
+ModulePass *createIROutlinerPass();
+
+//===----------------------------------------------------------------------===//
/// createPartialInliningPass - This pass inlines parts of functions.
///
ModulePass *createPartialInliningPass();
--- /dev/null
+//===- IROutliner.h - Extract similar IR regions into functions ------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// The interface file for the IROutliner which is used by the IROutliner Pass.
+//
+// The outliner uses the IRSimilarityIdentifier to identify the similar regions
+// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
+// whether it will provide code size reduction. Each region is extracted using
+// the code extractor. These extracted functions are consolidated into a single
+// function and called from the extracted call site.
+//
+// For example:
+// \code
+// %1 = add i32 %a, %b
+// %2 = add i32 %b, %a
+// %3 = add i32 %b, %a
+// %4 = add i32 %a, %b
+// \endcode
+// would become function
+// \code
+// define internal void outlined_ir_function(i32 %0, i32 %1) {
+// %1 = add i32 %0, %1
+// %2 = add i32 %1, %0
+// ret void
+// }
+// \endcode
+// with calls:
+// \code
+// call void outlined_ir_function(i32 %a, i32 %b)
+// call void outlined_ir_function(i32 %b, i32 %a)
+// \endcode
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
+#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <set>
+
+struct OutlinableGroup;
+
+namespace llvm {
+using namespace IRSimilarity;
+
+class Module;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+
+/// The OutlinableRegion holds all the information for a specific region, or
+/// sequence of instructions. This includes what values need to be hoisted to
+/// arguments from the extracted function, inputs and outputs to the region, and
+/// mapping from the extracted function arguments to overall function arguments.
+struct OutlinableRegion {
+ /// Describes the region of code.
+ IRSimilarityCandidate *Candidate;
+
+ /// If this region is outlined, the front and back IRInstructionData could
+ /// potentially become invalidated if the only new instruction is a call.
+ /// This ensures that we replace in the instruction in the IRInstructionData.
+ IRInstructionData *NewFront = nullptr;
+ IRInstructionData *NewBack = nullptr;
+
+ /// Used to create an outlined function.
+ CodeExtractor *CE = nullptr;
+
+ /// The call site of the extracted region.
+ CallInst *Call = nullptr;
+
+ /// The function for the extracted region.
+ Function *ExtractedFunction = nullptr;
+
+ /// Flag for whether we have split out the IRSimilarityCanidate. That is,
+ /// make the region contained the IRSimilarityCandidate its own BasicBlock.
+ bool CandidateSplit = false;
+
+ /// Flag for whether we should not consider this region for extraction.
+ bool IgnoreRegion = false;
+
+ /// The BasicBlock that is before the start of the region BasicBlock,
+ /// only defined when the region has been split.
+ BasicBlock *PrevBB = nullptr;
+
+ /// The BasicBlock that contains the starting instruction of the region.
+ BasicBlock *StartBB = nullptr;
+
+ /// The BasicBlock that contains the ending instruction of the region.
+ BasicBlock *EndBB = nullptr;
+
+ /// The BasicBlock that is after the start of the region BasicBlock,
+ /// only defined when the region has been split.
+ BasicBlock *FollowBB = nullptr;
+
+ /// The Outlinable Group that contains this region and structurally similar
+ /// regions to this region.
+ OutlinableGroup *Parent = nullptr;
+
+ OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
+ : Candidate(&C), Parent(&Group) {
+ StartBB = C.getStartBB();
+ EndBB = C.getEndBB();
+ }
+
+ /// For the contained region, split the parent BasicBlock at the starting and
+ /// ending instructions of the contained IRSimilarityCandidate.
+ void splitCandidate();
+
+ /// For the contained region, reattach the BasicBlock at the starting and
+ /// ending instructions of the contained IRSimilarityCandidate, or if the
+ /// function has been extracted, the start and end of the BasicBlock
+ /// containing the called function.
+ void reattachCandidate();
+};
+
+/// This class is a pass that identifies similarity in a Module, extracts
+/// instances of the similarity, and then consolidating the similar regions
+/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
+/// to identify the similar regions of code, and then extracts the similar
+/// sections into a single function. See the above for an example as to
+/// how code is extracted and consolidated into a single function.
+class IROutliner {
+public:
+ IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
+ function_ref<IRSimilarityIdentifier &(Module &)> GIRSI)
+ : getTTI(GTTI), getIRSI(GIRSI) {}
+ bool run(Module &M);
+
+private:
+ /// Find repeated similar code sequences in \p M and outline them into new
+ /// Functions.
+ ///
+ /// \param [in] M - The module to outline from.
+ /// \returns The number of Functions created.
+ unsigned doOutline(Module &M);
+
+ /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
+ /// instructions contained in a previously outlined region and put the
+ /// remaining regions in \p CurrentGroup.
+ ///
+ /// \param [in] CandidateVec - List of similarity candidates for regions with
+ /// the same similarity structure.
+ /// \param [in,out] CurrentGroup - Contains the potential sections to
+ /// be outlined.
+ void
+ pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
+ OutlinableGroup &CurrentGroup);
+
+ /// Extract \p Region into its own function.
+ ///
+ /// \param [in] Region - The region to be extracted into its own function.
+ /// \returns True if it was successfully outlined.
+ bool extractSection(OutlinableRegion &Region);
+
+ /// The set of outlined Instructions, identified by their location in the
+ /// sequential ordering of instructions in a Module.
+ DenseSet<unsigned> Outlined;
+
+ /// TargetTransformInfo lambda for target specific information.
+ function_ref<TargetTransformInfo &(Function &)> getTTI;
+
+ /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
+ function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
+
+ /// The memory allocator used to allocate the CodeExtractors.
+ SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
+
+ /// The memory allocator used to allocate the OutlinableRegions.
+ SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
+
+ /// The memory allocator used to allocate new IRInstructionData.
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+};
+
+/// Pass to outline similar regions.
+class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
extern cl::opt<bool> EnableGVNHoist;
extern cl::opt<bool> EnableGVNSink;
extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
extern cl::opt<bool> EnableOrderFileInstrumentation;
extern cl::opt<bool> EnableCHR;
extern cl::opt<bool> EnableUnrollAndJam;
if (EnableHotColdSplit && !LTOPreLink)
MPM.addPass(HotColdSplittingPass());
+ // Search the code for similar regions of code. If enough similar regions can
+ // be found where extracting the regions into their own function will decrease
+ // the size of the program, we extract the regions, a deduplicate the
+ // structurally similar regions.
+ if (EnableIROutliner)
+ MPM.addPass(IROutlinerPass());
+
// Merge functions if requested.
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
+MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
MODULE_PASS("loop-extract", LoopExtractorPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
GlobalSplit.cpp
HotColdSplitting.cpp
IPO.cpp
+ IROutliner.cpp
InferFunctionAttrs.cpp
InlineSimple.cpp
Inliner.cpp
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
initializeHotColdSplittingLegacyPassPass(Registry);
+ initializeIROutlinerLegacyPassPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);
initializeInferFunctionAttrsLegacyPassPass(Registry);
--- /dev/null
+//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+// Implementation for the IROutliner which is used by the IROutliner Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include <map>
+#include <set>
+#include <vector>
+
+#define DEBUG_TYPE "iroutliner"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+/// The OutlinableGroup holds all the overarching information for outlining
+/// a set of regions that are structurally similar to one another, such as the
+/// types of the overall function, the output blocks, the sets of stores needed
+/// and a list of the different regions. This information is used in the
+/// deduplication of extracted regions with the same structure.
+struct OutlinableGroup {
+ /// The sections that could be outlined
+ std::vector<OutlinableRegion *> Regions;
+
+ /// Flag for whether we should not consider this group of OutlinableRegions
+ /// for extraction.
+ bool IgnoreGroup = false;
+};
+
+/// Move the contents of \p SourceBB to before the last instruction of \p
+/// TargetBB.
+/// \param SourceBB - the BasicBlock to pull Instructions from.
+/// \param TargetBB - the BasicBlock to put Instruction into.
+static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
+ BasicBlock::iterator BBCurr, BBEnd, BBNext;
+ for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
+ BBCurr = BBNext) {
+ BBNext = std::next(BBCurr);
+ BBCurr->moveBefore(TargetBB, TargetBB.end());
+ }
+}
+
+void OutlinableRegion::splitCandidate() {
+ assert(!CandidateSplit && "Candidate already split!");
+
+ Instruction *StartInst = (*Candidate->begin()).Inst;
+ Instruction *EndInst = (*Candidate->end()).Inst;
+ assert(StartInst && EndInst && "Expected a start and end instruction?");
+ StartBB = StartInst->getParent();
+ PrevBB = StartBB;
+
+ // The basic block gets split like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // region1 br block_to_outline
+ // region2 block_to_outline:
+ // region3 -> region1
+ // region4 region2
+ // inst3 region3
+ // inst4 region4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+
+ std::string OriginalName = PrevBB->getName().str();
+
+ StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
+
+ // This is the case for the inner block since we do not have to include
+ // multiple blocks.
+ EndBB = StartBB;
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+
+ CandidateSplit = true;
+}
+
+void OutlinableRegion::reattachCandidate() {
+ assert(CandidateSplit && "Candidate is not split!");
+
+ // The basic block gets reattached like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // br block_to_outline region1
+ // block_to_outline: -> region2
+ // region1 region3
+ // region2 region4
+ // region3 inst3
+ // region4 inst4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+ assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
+ assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
+
+ // StartBB should only have one predecessor since we put an unconditional
+ // branch at the end of PrevBB when we split the BasicBlock.
+ PrevBB = StartBB->getSinglePredecessor();
+ assert(PrevBB != nullptr &&
+ "No Predecessor for the region start basic block!");
+
+ assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+ assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
+ PrevBB->getTerminator()->eraseFromParent();
+ EndBB->getTerminator()->eraseFromParent();
+
+ moveBBContents(*StartBB, *PrevBB);
+
+ BasicBlock *PlacementBB = PrevBB;
+ if (StartBB != EndBB)
+ PlacementBB = EndBB;
+ moveBBContents(*FollowBB, *PlacementBB);
+
+ PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
+ PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ StartBB->eraseFromParent();
+ FollowBB->eraseFromParent();
+
+ // Make sure to save changes back to the StartBB.
+ StartBB = PrevBB;
+ EndBB = nullptr;
+ PrevBB = nullptr;
+ FollowBB = nullptr;
+
+ CandidateSplit = false;
+}
+
+void IROutliner::pruneIncompatibleRegions(
+ std::vector<IRSimilarityCandidate> &CandidateVec,
+ OutlinableGroup &CurrentGroup) {
+ bool PreviouslyOutlined;
+
+ // Sort from beginning to end, so the IRSimilarityCandidates are in order.
+ stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
+ const IRSimilarityCandidate &RHS) {
+ return LHS.getStartIdx() < RHS.getStartIdx();
+ });
+
+ unsigned CurrentEndIdx = 0;
+ for (IRSimilarityCandidate &IRSC : CandidateVec) {
+ PreviouslyOutlined = false;
+ unsigned StartIdx = IRSC.getStartIdx();
+ unsigned EndIdx = IRSC.getEndIdx();
+
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.find(Idx) != Outlined.end()) {
+ PreviouslyOutlined = true;
+ break;
+ }
+
+ if (PreviouslyOutlined)
+ continue;
+
+ // TODO: If in the future we can outline across BasicBlocks, we will need to
+ // check all BasicBlocks contained in the region.
+ if (IRSC.getStartBB()->hasAddressTaken())
+ continue;
+
+ // Greedily prune out any regions that will overlap with already chosen
+ // regions.
+ if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
+ continue;
+
+ OutlinableRegion *OS = new (RegionAllocator.Allocate())
+ OutlinableRegion(IRSC, CurrentGroup);
+ CurrentGroup.Regions.push_back(OS);
+
+ CurrentEndIdx = EndIdx;
+ }
+}
+
+bool IROutliner::extractSection(OutlinableRegion &Region) {
+ assert(Region.StartBB != nullptr &&
+ "StartBB for the OutlinableRegion is nullptr!");
+ assert(Region.FollowBB != nullptr &&
+ "StartBB for the OutlinableRegion is nullptr!");
+ Function *OrigF = Region.StartBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OrigF);
+ Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+
+ // If the extraction was successful, find the BasicBlock, and reassign the
+ // OutlinableRegion blocks
+ if (!Region.ExtractedFunction) {
+ LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
+ << "\n");
+ Region.reattachCandidate();
+ return false;
+ }
+
+ BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ Region.StartBB = RewrittenBB;
+ Region.EndBB = RewrittenBB;
+
+ // The sequences of outlinable regions has now changed. We must fix the
+ // IRInstructionDataList for consistency. Although they may not be illegal
+ // instructions, they should not be compared with anything else as they
+ // should not be outlined in this round. So marking these as illegal is
+ // allowed.
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ Region.NewFront = new (InstDataAllocator.Allocate())
+ IRInstructionData(*RewrittenBB->begin(), false, *IDL);
+ Region.NewBack = new (InstDataAllocator.Allocate())
+ IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), false, *IDL);
+
+ // Insert the first IRInstructionData of the new region in front of the
+ // first IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->begin(), *Region.NewFront);
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *Region.NewBack);
+ // Remove the IRInstructionData from the IRSimilarityCandidate.
+ IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
+
+ assert(RewrittenBB != nullptr &&
+ "Could not find a predecessor after extraction!");
+
+ // Iterate over the new set of instructions to find the new call
+ // instruction.
+ for (Instruction &I : *RewrittenBB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (Region.ExtractedFunction == CI->getCalledFunction()) {
+ Region.Call = CI;
+ break;
+ }
+ Region.reattachCandidate();
+ return true;
+}
+
+unsigned IROutliner::doOutline(Module &M) {
+ // Find the possibile similarity sections.
+ IRSimilarityIdentifier &Identifier = getIRSI(M);
+ SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
+
+ // Sort them by size of extracted sections
+ unsigned OutlinedFunctionNum = 0;
+ // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
+ // to sort them by the potential number of instructions to be outlined
+ if (SimilarityCandidates.size() > 1)
+ llvm::stable_sort(SimilarityCandidates,
+ [](const std::vector<IRSimilarityCandidate> &LHS,
+ const std::vector<IRSimilarityCandidate> &RHS) {
+ return LHS[0].getLength() * LHS.size() >
+ RHS[0].getLength() * RHS.size();
+ });
+
+ // Iterate over the possible sets of similarity.
+ for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
+ OutlinableGroup CurrentGroup;
+
+ // Remove entries that were previously outlined
+ pruneIncompatibleRegions(CandidateVec, CurrentGroup);
+
+ // We pruned the number of regions to 0 to 1, meaning that it's not worth
+ // trying to outlined since there is no compatible similar instance of this
+ // code.
+ if (CurrentGroup.Regions.size() < 2)
+ continue;
+
+ // Create a CodeExtractor for each outlinable region.
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ // Break the outlinable region out of its parent BasicBlock into its own
+ // BasicBlocks (see function implementation).
+ OS->splitCandidate();
+ std::vector<BasicBlock *> BE = {OS->StartBB};
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
+ }
+
+ // Create functions out of all the sections, and mark them as outlined
+ std::vector<OutlinableRegion *> OutlinedRegions;
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ OutlinedFunctionNum++;
+ bool FunctionOutlined = extractSection(*OS);
+ if (FunctionOutlined) {
+ unsigned StartIdx = OS->Candidate->getStartIdx();
+ unsigned EndIdx = OS->Candidate->getEndIdx();
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ Outlined.insert(Idx);
+
+ OutlinedRegions.push_back(OS);
+ }
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ }
+
+ return OutlinedFunctionNum;
+}
+
+bool IROutliner::run(Module &M) { return doOutline(M) > 0; }
+
+// Pass Manager Boilerplate
+class IROutlinerLegacyPass : public ModulePass {
+public:
+ static char ID;
+ IROutlinerLegacyPass() : ModulePass(ID) {
+ initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<IRSimilarityIdentifierWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+bool IROutlinerLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
+ return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
+ };
+
+ return IROutliner(GTTI, GIRSI).run(M);
+}
+
+PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ std::function<TargetTransformInfo &(Function &)> GTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
+ [&AM](Module &M) -> IRSimilarityIdentifier & {
+ return AM.getResult<IRSimilarityAnalysis>(M);
+ };
+
+ if (IROutliner(GTTI, GIRSI).run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char IROutlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+
+ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
+cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
+ cl::desc("Enable ir outliner pass"));
+
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
MPM.add(createHotColdSplittingPass());
+ if (EnableIROutliner)
+ MPM.add(createIROutlinerPass());
+
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test makes sure we are extracting the found similarity sections
+; correctly at the call site.
+
+define void @extract1() {
+; CHECK-LABEL: @extract1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @extract1.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @extract2() {
+; CHECK-LABEL: @extract2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @extract2.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @extract_outs1() #0 {
+; CHECK-LABEL: @extract_outs1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
+; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
+; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
+; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %output = alloca i32, align 4
+ %result = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ %0 = load i32, i32* %a, align 4
+ %1 = load i32, i32* %b, align 4
+ %add = add i32 %0, %1
+ store i32 %add, i32* %output, align 4
+ %2 = load i32, i32* %output, align 4
+ %3 = load i32, i32* %output, align 4
+ %mul = mul i32 %2, %add
+ store i32 %mul, i32* %result, align 4
+ ret void
+}
+
+define void @extract_outs2() #0 {
+; CHECK-LABEL: @extract_outs2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
+; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
+; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %output = alloca i32, align 4
+ %result = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ %0 = load i32, i32* %a, align 4
+ %1 = load i32, i32* %b, align 4
+ %add = add i32 %0, %1
+ store i32 %add, i32* %output, align 4
+ %2 = load i32, i32* %output, align 4
+ %mul = mul i32 %2, %add
+ store i32 %mul, i32* %result, align 4
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test shows that we do not outline from basic blocks with their address
+; taken.
+
+@ba1 = constant i8* blockaddress (@dontoutline, %new_block)
+
+define void @outline_1() {
+; CHECK-LABEL: @outline_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @outline_2() {
+; CHECK-LABEL: @outline_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @[[FUNCTION_2:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @dontoutline() {
+; CHECK-LABEL: @dontoutline(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label [[NEW_BLOCK:%.*]]
+; CHECK: new_block:
+; CHECK-NEXT: store i32 2, i32* [[A]], align 4
+; CHECK-NEXT: store i32 3, i32* [[B]], align 4
+; CHECK-NEXT: store i32 4, i32* [[C]], align 4
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ br label %new_block
+new_block:
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_2]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This is a negative case to show that when we have the same set of
+; instructions, but in a different order, they are not outlined in the same way.
+; In this case, the arguments passed into the function are in a different order.
+
+define void @outline_constants1() {
+; CHECK-LABEL: @outline_constants1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 2, i32* [[A]], align 4
+; CHECK-NEXT: store i32 3, i32* [[B]], align 4
+; CHECK-NEXT: store i32 4, i32* [[C]], align 4
+; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[C]], i32* [[B]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %cl = load i32, i32* %c
+ %bl = load i32, i32* %b
+ ret void
+}
+
+define void @outline_constants2() {
+; CHECK-LABEL: @outline_constants2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 2, i32* [[A]], align 4
+; CHECK-NEXT: store i32 3, i32* [[B]], align 4
+; CHECK-NEXT: store i32 4, i32* [[C]], align 4
+; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+
+; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test looks at the constants in the regions, and if it they are the
+; same it outlines them as constants rather than elevating them to arguments.
+
+define void @outline_constants1() {
+; CHECK-LABEL: @outline_constants1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @outline_constants2() {
+; CHECK-LABEL: @outline_constants2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+
+; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+@global1 = global i32 1, align 4
+@global2 = global i32 2, align 4
+
+; This test looks at the globals in the regions, and if it they are the
+; same it outlines the region without elevating the globals to arguments.
+
+define void @outline_globals1() {
+; CHECK-LABEL: @outline_globals1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @[[FUNCTION_0:.*]]()
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* @global1
+ %1 = load i32, i32* @global2
+ %2 = add i32 %0, %1
+ ret void
+}
+
+define void @outline_globals2() {
+; CHECK-LABEL: @outline_globals2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @[[FUNCTION_1:.*]]()
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* @global1
+ %1 = load i32, i32* @global2
+ %2 = add i32 %0, %1
+ ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]]()
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
+
+
+; CHECK: define internal void @[[FUNCTION_1]]()
+; CHECK: entry_to_outline:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
+