From c52bcf3a9b2d3cd60e62f38218979b781ccc9d8a Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Thu, 3 Sep 2020 12:20:47 -0500 Subject: [PATCH] [IRSim][IROutliner] Limit to extracting regions that only require inputs. Extracted regions can have both inputs and outputs. In addition, the CodeExtractor removes inputs that are only used in llvm.assumes, and sunken allocas (values are used entirely in the extracted region as denoted by lifetime intrinsics). We also cannot combine sections that have different constants in the same structural location, and these constants will have to elevated to argument. This patch limits the extracted regions to those that only require inputs, and do not have any other special cases. We test that we do not outline the wrong constants in: test/Transforms/IROutliner/outliner-different-constants.ll test/Transforms/IROutliner/outliner-different-globals.ll test/Transforms/IROutliner/outliner-constant-vs-registers.ll We test that correctly outline in: test/Transforms/IROutliner/outlining-same-globals.ll test/Transforms/IROutliner/outlining-same-constants.ll test/Transforms/IROutliner/outlining-different-structure.ll Reviewers: paquette, plofti Differential Revision: https://reviews.llvm.org/D86977 --- llvm/include/llvm/Transforms/IPO/IROutliner.h | 22 +- llvm/lib/Transforms/IPO/IROutliner.cpp | 224 ++++++++++++++++++++- llvm/test/Transforms/IROutliner/extraction.ll | 46 ++--- llvm/test/Transforms/IROutliner/illegal-assumes.ll | 22 +- llvm/test/Transforms/IROutliner/illegal-memcpy.ll | 40 +--- llvm/test/Transforms/IROutliner/illegal-memmove.ll | 40 +--- llvm/test/Transforms/IROutliner/illegal-vaarg.ll | 30 ++- .../IROutliner/outlining-constants-vs-registers.ll | 78 +++++++ .../IROutliner/outlining-different-constants.ll | 62 ++++++ .../IROutliner/outlining-different-globals.ll | 40 ++++ .../IROutliner/outlining-different-structure.ll | 6 - .../IROutliner/outlining-same-constants.ll | 9 - 12 files changed, 472 insertions(+), 147 deletions(-) create mode 100644 llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-different-constants.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-different-globals.ll diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index b49970e..25e42e1 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -154,6 +154,13 @@ private: pruneIncompatibleRegions(std::vector &CandidateVec, OutlinableGroup &CurrentGroup); + /// Identify the needed extracted inputs in a section, and add to the overall + /// function if needed. + /// + /// \param [in] M - The module to outline from. + /// \param [in,out] Region - The region to be extracted + void findAddInputsOutputs(Module &M, OutlinableRegion &Region); + /// Extract \p Region into its own function. /// /// \param [in] Region - The region to be extracted into its own function. @@ -182,8 +189,7 @@ private: /// Custom InstVisitor to classify different instructions for whether it can /// be analyzed for similarity. This is needed as there may be instruction we /// can identify as having similarity, but are more complicated to outline. - struct InstructionAllowed - : public InstVisitor { + struct InstructionAllowed : public InstVisitor { InstructionAllowed() {} // TODO: Determine a scheme to resolve when the label is similar enough. @@ -203,13 +209,9 @@ private: // DebugInfo should be included in the regions, but should not be // analyzed for similarity as it has no bearing on the outcome of the // program. - bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { - return true; - } + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle GetElementPtrInsts - bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { - return false; - } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { return false; } // TODO: Handle specific intrinsics individually from those that can be // handled. bool IntrinsicInst(IntrinsicInst &II) { return false; } @@ -226,9 +228,7 @@ private: bool visitCallBrInst(CallBrInst &CBI) { return false; } // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } - bool visitInstruction(Instruction &I) { - return true; - } + bool visitInstruction(Instruction &I) { return true; } }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 9985d9a..7a1fdd4 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -41,6 +41,13 @@ struct OutlinableGroup { /// Flag for whether we should not consider this group of OutlinableRegions /// for extraction. bool IgnoreGroup = false; + + /// For the \ref Regions, we look at every Value. If it is a constant, + /// we check whether it is the same in Region. + /// + /// \param [in,out] NotSame contains the global value numbers where the + /// constant is not always the same, and must be passed in as an argument. + void findSameConstants(DenseSet &NotSame); }; /// Move the contents of \p SourceBB to before the last instruction of \p @@ -144,6 +151,198 @@ void OutlinableRegion::reattachCandidate() { CandidateSplit = false; } +/// Find whether \p V matches the Constants previously found for the \p GVN. +/// +/// \param V - The value to check for consistency. +/// \param GVN - The global value number assigned to \p V. +/// \param GVNToConstant - The mapping of global value number to Constants. +/// \returns true if the Value matches the Constant mapped to by V and false if +/// it \p V is a Constant but does not match. +/// \returns None if \p V is not a Constant. +static Optional +constantMatches(Value *V, unsigned GVN, + DenseMap &GVNToConstant) { + // See if we have a constants + Constant *CST = dyn_cast(V); + if (!CST) + return None; + + // Holds a mapping from a global value number to a Constant. + DenseMap::iterator GVNToConstantIt; + bool Inserted; + + // If we have a constant, try to make a new entry in the GVNToConstant. + std::tie(GVNToConstantIt, Inserted) = + GVNToConstant.insert(std::make_pair(GVN, CST)); + // If it was found and is not equal, it is not the same. We do not + // handle this case yet, and exit early. + if (Inserted || (GVNToConstantIt->second == CST)) + return true; + + return false; +} + +/// Find whether \p Region matches the global value numbering to Constant mapping +/// found so far. +/// +/// \param Region - The OutlinableRegion we are checking for constants +/// \param NotSame - The set of global value numbers that do not have the same +/// constant in each region. +/// \returns true if all Constants are the same in every use of a Constant in \p +/// Region and false if not +static bool +collectRegionsConstants(OutlinableRegion &Region, + DenseMap &GVNToConstant, + DenseSet &NotSame) { + IRSimilarityCandidate &C = *Region.Candidate; + for (IRInstructionData &ID : C) { + + // Iterate over the operands in an instruction. If the global value number, + // assigned by the IRSimilarityCandidate, has been seen before, we check if + // the the number has been found to be not the same value in each instance. + for (Value *V : ID.OperVals) { + Optional GVNOpt = C.getGVN(V); + assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); + unsigned GVN = GVNOpt.getValue(); + + // If this global value has been found to not be the same, it could have + // just been a register, check that it is not a constant value. + if (NotSame.find(GVN) != NotSame.end()) { + if (isa(V)) + return false; + continue; + } + + // If it has been the same so far, we check the value for if the + // associated Constant value match the previous instances of the same + // global value number. If the global value does not map to a Constant, + // it is considered to not be the same value. + Optional ConstantMatches = constantMatches(V, GVN, GVNToConstant); + if (ConstantMatches.hasValue()) { + if (ConstantMatches.getValue()) + continue; + else + return false; + } + + // While this value is a register, it might not have been previously, + // make sure we don't already have a constant mapped to this global value + // number. + if (GVNToConstant.find(GVN) != GVNToConstant.end()) + return false; + + NotSame.insert(GVN); + } + } + + return true; +} + +void OutlinableGroup::findSameConstants(DenseSet &NotSame) { + DenseMap GVNToConstant; + + for (OutlinableRegion *Region : Regions) + if (!collectRegionsConstants(*Region, GVNToConstant, NotSame)) { + IgnoreGroup = true; + return; + } +} + +/// Find the GVN for the inputs that have been found by the CodeExtractor, +/// excluding the ones that will be removed by llvm.assumes as these will be +/// removed by the CodeExtractor. +/// +/// \param [in] C - The IRSimilarityCandidate containing the region we are +/// analyzing. +/// \param [in] CurrentInputs - The set of inputs found by the +/// CodeExtractor. +/// \param [out] CurrentInputNumbers - The global value numbers for the extracted +/// arguments. +static void mapInputsToGVNs(IRSimilarityCandidate &C, + SetVector &CurrentInputs, + std::vector &EndInputNumbers) { + // Get the global value number for each input. + for (Value *Input : CurrentInputs) { + assert(Input && "Have a nullptr as an input"); + assert(C.getGVN(Input).hasValue() && + "Could not find a numbering for the given input"); + EndInputNumbers.push_back(C.getGVN(Input).getValue()); + } +} + +/// Find the input GVNs and the output values for a region of Instructions. +/// Using the code extractor, we collect the inputs to the extracted function. +/// +/// The \p Region can be identifed as needing to be ignored in this function. +/// It should be checked whether it should be ignored after a call to this +/// function. +/// +/// \param [in,out] Region - The region of code to be analyzed. +/// \param [out] Inputs - The global value numbers for the extracted arguments. +/// \param [out] ArgInputs - The values of the inputs to the extracted function. +static void getCodeExtractorArguments(OutlinableRegion &Region, + std::vector &InputGVNs, + SetVector &ArgInputs) { + IRSimilarityCandidate &C = *Region.Candidate; + + // OverallInputs are the inputs to the region found by the CodeExtractor, + // SinkCands and HoistCands are used by the CodeExtractor to find sunken + // allocas of values whose lifetimes are contained completely within the + // outlined region. Outputs are values used outside of the outlined region + // found by the CodeExtractor. + SetVector OverallInputs, SinkCands, HoistCands, Outputs; + + // Use the code extractor to get the inputs and outputs, without sunken + // allocas or removing llvm.assumes. + CodeExtractor *CE = Region.CE; + CE->findInputsOutputs(OverallInputs, Outputs, SinkCands); + assert(Region.StartBB && "Region must have a start BasicBlock!"); + Function *OrigF = Region.StartBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OrigF); + BasicBlock *Dummy = nullptr; + + // The region may be ineligible due to VarArgs in the parent function. In this + // case we ignore the region. + if (!CE->isEligible()) { + Region.IgnoreRegion = true; + return; + } + + // Find if any values are going to be sunk into the function when extracted + CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy); + CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); + + // TODO: Support regions with output values. Outputs add an extra layer of + // resolution that adds too much complexity at this stage. + if (Outputs.size() > 0) { + Region.IgnoreRegion = true; + return; + } + + // TODO: Support regions with sunken allocas: values whose lifetimes are + // contained completely within the outlined region. These are not guaranteed + // to be the same in every region, so we must elevate them all to arguments + // when they appear. If these values are not equal, it means there is some + // Input in OverallInputs that was removed for ArgInputs. + if (ArgInputs.size() != OverallInputs.size()) { + Region.IgnoreRegion = true; + return; + } + + mapInputsToGVNs(C, OverallInputs, InputGVNs); +} + +void IROutliner::findAddInputsOutputs( + Module &M, OutlinableRegion &Region) { + std::vector Inputs; + SetVector ArgInputs; + + getCodeExtractorArguments(Region, Inputs, ArgInputs); + + if (Region.IgnoreRegion) + return; +} + void IROutliner::pruneIncompatibleRegions( std::vector &CandidateVec, OutlinableGroup &CurrentGroup) { @@ -271,6 +470,7 @@ unsigned IROutliner::doOutline(Module &M) { RHS[0].getLength() * RHS.size(); }); + DenseSet NotSame; // Iterate over the possible sets of similarity. for (SimilarityGroup &CandidateVec : SimilarityCandidates) { OutlinableGroup CurrentGroup; @@ -284,7 +484,18 @@ unsigned IROutliner::doOutline(Module &M) { if (CurrentGroup.Regions.size() < 2) continue; - // Create a CodeExtractor for each outlinable region. + // Determine if there are any values that are the same constant throughout + // each section in the set. + NotSame.clear(); + CurrentGroup.findSameConstants(NotSame); + + if (CurrentGroup.IgnoreGroup) + continue; + + // Create a CodeExtractor for each outlinable region. Identify inputs and + // outputs for each section using the code extractor and create the argument + // types for the Aggregate Outlining Function. + std::vector OutlinedRegions; for (OutlinableRegion *OS : CurrentGroup.Regions) { // Break the outlinable region out of its parent BasicBlock into its own // BasicBlocks (see function implementation). @@ -293,10 +504,17 @@ unsigned IROutliner::doOutline(Module &M) { OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); + findAddInputsOutputs(M, *OS); + if (!OS->IgnoreRegion) + OutlinedRegions.push_back(OS); + else + OS->reattachCandidate(); } - // Create functions out of all the sections, and mark them as outlined - std::vector OutlinedRegions; + CurrentGroup.Regions = std::move(OutlinedRegions); + + // Create functions out of all the sections, and mark them as outlined. + OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { OutlinedFunctionNum++; bool FunctionOutlined = extractSection(*OS); diff --git a/llvm/test/Transforms/IROutliner/extraction.ll b/llvm/test/Transforms/IROutliner/extraction.ll index ec0f0ea..34c784d 100644 --- a/llvm/test/Transforms/IROutliner/extraction.ll +++ b/llvm/test/Transforms/IROutliner/extraction.ll @@ -48,26 +48,24 @@ entry: ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs1() #0 { ; CHECK-LABEL: @extract_outs1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4 -; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs1.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: @@ -88,25 +86,23 @@ entry: ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs2() #0 { ; CHECK-LABEL: @extract_outs2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs2.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index 3bd49b4..c94c886 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -7,19 +7,15 @@ define void @outline_assumes() { ; CHECK-LABEL: @outline_assumes( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes.outlined.5(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] +; CHECK-NEXT: store i1 true, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL]], [[DL]] ; CHECK-NEXT: call void @outline_assumes.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes.outlined.1(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; @@ -44,18 +40,14 @@ entry: define void @outline_assumes2() { ; CHECK-LABEL: @outline_assumes2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes2.outlined.6(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: store i1 false, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 ; CHECK-NEXT: call void @outline_assumes2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes2.outlined.2(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll index 68241e0..ebae28d 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -9,22 +9,12 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ entry: define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll index 98782cb..740fcfb 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -9,22 +9,12 @@ declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ entry: define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll index 7e194fe..fdf03d4 100644 --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -11,20 +11,17 @@ declare void @llvm.va_end(i8*) define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func1.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: ret i32 [[TMP]] @@ -49,20 +46,17 @@ entry: define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func2.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll b/llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll new file mode 100644 index 0000000..7f57ecd --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at instances of constants in the different regions. If there +; is a register in the same place as a constant in a similar region of code, we +; do not outline those regions. + +; The first function tests that we do not outline with the register is +; seen first, and the second function checks that we do not outline when the +; constant is seen first. + +define void @function_registers_first(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: @function_registers_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[A]], align 4 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[C]], align 4 +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 %0, i32* %a, align 4 + store i32 %1, i32* %b, align 4 + store i32 %2, i32* %c, align 4 + ret void +next: + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + ret void +} + +define void @function_with_constants_first() { +; CHECK-LABEL: @function_with_constants_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 2, [[AL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[BL]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 4, [[CL]] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[AL]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], [[BL]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], [[CL]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 2, %al + %1 = add i32 3, %bl + %2 = add i32 4, %cl + ret void +next: + %3 = add i32 %0, %al + %4 = add i32 %1, %bl + %5 = add i32 %2, %cl + ret void +} diff --git a/llvm/test/Transforms/IROutliner/outlining-different-constants.ll b/llvm/test/Transforms/IROutliner/outlining-different-constants.ll new file mode 100644 index 0000000..edcfe3c --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-different-constants.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the constants in the regions, and if it they are the +; differents it does not outline them as they cannot be consolidated into the +; the same function. + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 3, i32* [[A]], align 4 +; CHECK-NEXT: store i32 4, i32* [[B]], align 4 +; CHECK-NEXT: store i32 5, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 3, i32* %a, align 4 + store i32 4, i32* %b, align 4 + store i32 5, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-different-globals.ll b/llvm/test/Transforms/IROutliner/outlining-different-globals.ll new file mode 100644 index 0000000..84134fc --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-different-globals.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the globals in the regions, and makes sure they are not +; outlined if they are different values. + +@global1 = global i32 1, align 4 +@global2 = global i32 2, align 4 +@global3 = global i32 3, align 4 +@global4 = global i32 4, align 4 + +define void @outline_globals1() { +; CHECK-LABEL: @outline_globals1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global1, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global2, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global1 + %1 = load i32, i32* @global2 + %2 = add i32 %0, %1 + ret void +} + +define void @outline_globals2() { +; CHECK-LABEL: @outline_globals2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global3, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global4, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global3 + %1 = load i32, i32* @global4 + %2 = add i32 %0, %1 + ret void +} diff --git a/llvm/test/Transforms/IROutliner/outlining-different-structure.ll b/llvm/test/Transforms/IROutliner/outlining-different-structure.ll index bb4af26..35a742e 100644 --- a/llvm/test/Transforms/IROutliner/outlining-different-structure.ll +++ b/llvm/test/Transforms/IROutliner/outlining-different-structure.ll @@ -60,9 +60,3 @@ entry: ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-same-constants.ll b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll index e6228ab..4020463 100644 --- a/llvm/test/Transforms/IROutliner/outlining-same-constants.ll +++ b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll @@ -56,12 +56,3 @@ entry: ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 -; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 -; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 -- 2.7.4