From: Sean Fertile Date: Fri, 20 Apr 2018 19:56:26 +0000 (+0000) Subject: [PartialInlining] Fix Crash from holding a reference to a destructed ORE. X-Git-Tag: llvmorg-7.0.0-rc1~7769 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=18f17333ddb1d371cc4810cd16fa24040095ddc3;p=platform%2Fupstream%2Fllvm.git [PartialInlining] Fix Crash from holding a reference to a destructed ORE. The callback used to create an ORE for the legacy PI pass caches the allocated object in a unique_ptr in the runOnModule function, and returns a reference to that object. Under certian circumstances we can end up holding onto that reference after the OREs destruction. Rather then allowing the new and legacy passes to create ORE object in diffrent ways, create the ORE at the point of use. Differential Revision: https://reviews.llvm.org/D43219 llvm-svn: 330473 --- diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index a16269b..36bd6de 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -202,10 +202,8 @@ struct PartialInlinerImpl { std::function *GetAC, std::function *GTTI, Optional> GBFI, - ProfileSummaryInfo *ProfSI, - std::function *GORE) - : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI), - GetORE(GORE) {} + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -271,7 +269,6 @@ private: std::function *GetTTI; Optional> GetBFI; ProfileSummaryInfo *PSI; - std::function *GetORE; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -282,7 +279,8 @@ private: // Return true if the callee of CS should be partially inlined with // profit. bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost); + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true @@ -337,7 +335,7 @@ private: std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr - computeOutliningColdRegionsInfo(Function *F); + computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE); }; struct PartialInlinerLegacyPass : public ModulePass { @@ -362,7 +360,6 @@ struct PartialInlinerLegacyPass : public ModulePass { &getAnalysis(); ProfileSummaryInfo *PSI = getAnalysis().getPSI(); - std::unique_ptr UPORE; std::function GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { @@ -374,14 +371,7 @@ struct PartialInlinerLegacyPass : public ModulePass { return TTIWP->getTTI(F); }; - std::function GetORE = - [&UPORE](Function &F) -> OptimizationRemarkEmitter & { - UPORE.reset(new OptimizationRemarkEmitter(&F)); - return *UPORE.get(); - }; - - return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI, - &GetORE) + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI) .run(M); } }; @@ -389,7 +379,8 @@ struct PartialInlinerLegacyPass : public ModulePass { } // end anonymous namespace std::unique_ptr -PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { +PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, + OptimizationRemarkEmitter &ORE) { BasicBlock *EntryBlock = &F->front(); DominatorTree DT(*F); @@ -403,8 +394,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { } else BFI = &(*GetBFI)(*F); - auto &ORE = (*GetORE)(*F); - // Return if we don't have profiling information. if (!PSI->hasInstrumentationProfile()) return std::unique_ptr(); @@ -766,7 +755,8 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { bool PartialInlinerImpl::shouldPartialInline( CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost) { + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { using namespace ore; Instruction *Call = CS.getInstruction(); @@ -778,7 +768,6 @@ bool PartialInlinerImpl::shouldPartialInline( Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); - auto &ORE = (*GetORE)(*Caller); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, *GetAssumptionCache, GetBFI, PSI, &ORE); @@ -1270,14 +1259,14 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { if (F->user_begin() == F->user_end()) return {false, nullptr}; - auto &ORE = (*GetORE)(*F); + OptimizationRemarkEmitter ORE(F); // Only try to outline cold regions if we have a profile summary, which // implies we have profiling information. if (PSI->hasProfileSummary() && F->hasProfileData() && !DisableMultiRegionPartialInline) { std::unique_ptr OMRI = - computeOutliningColdRegionsInfo(F); + computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { FunctionCloner Cloner(F, OMRI.get(), ORE); @@ -1357,11 +1346,11 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { // inlining the function with outlining (The inliner uses the size increase to // model the cost of inlining a callee). if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { - auto &ORE = (*GetORE)(*Cloner.OrigFunc); + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); - ORE.emit([&]() { + OrigFuncORE.emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) << ore::NV("Function", Cloner.OrigFunc) @@ -1394,11 +1383,10 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (IsLimitReached()) continue; - - if (!shouldPartialInline(CS, Cloner, WeightedRcost)) + OptimizationRemarkEmitter CallerORE(CS.getCaller()); + if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE)) continue; - auto &ORE = (*GetORE)(*CS.getCaller()); // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); @@ -1413,7 +1401,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { : nullptr))) continue; - ORE.emit(OR); + CallerORE.emit(OR); // Now update the entry count: if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { @@ -1436,8 +1424,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (CalleeEntryCount) Cloner.OrigFunc->setEntryCount( CalleeEntryCount.setCount(CalleeEntryCountV)); - auto &ORE = (*GetORE)(*Cloner.OrigFunc); - ORE.emit([&]() { + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); + OrigFuncORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) << "Partially inlined into at least one caller"; }); @@ -1519,14 +1507,9 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, return FAM.getResult(F); }; - std::function GetORE = - [&FAM](Function &F) -> OptimizationRemarkEmitter & { - return FAM.getResult(F); - }; - ProfileSummaryInfo *PSI = &AM.getResult(M); - if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE) + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI) .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineORECrash.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineORECrash.ll new file mode 100644 index 0000000..c7f1494 --- /dev/null +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineORECrash.ll @@ -0,0 +1,170 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -inline-threshold=0 -disable-output + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%0 = type { i32 (...)**, %1, %1, %3, %3, %3, i8, float, %4*, %5*, %5*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] } +%1 = type { %2, %3 } +%2 = type { [3 x %3] } +%3 = type { [4 x float] } +%4 = type <{ i8*, i16, i16, [4 x i8], i8*, i32, %3, %3, [4 x i8] }> +%5 = type { i32 (...)**, i32, i8* } +%6 = type <{ %7, [4 x i8], %19*, %20*, %30, %35, %3, float, i8, i8, i8, i8, %37, i32, [4 x i8] }> +%7 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17 }> +%8 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8 }> +%9 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %0**, i8, [7 x i8] }> +%11 = type { i32 (...)** } +%12 = type { float, i32, i32, float, i8, %15*, i8, i8, i8, float, i8, float, %13* } +%13 = type opaque +%14 = type { i32 (...)** } +%15 = type { i32 (...)** } +%16 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17, [4 x i8] }> +%17 = type { %18 } +%18 = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 } +%19 = type { i32 (...)** } +%20 = type <{ i32 (...)**, %21, %25, %9, i8, [7 x i8] }> +%21 = type { %22 } +%22 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %24*, i8, [7 x i8] }> +%24 = type { i32, i32 } +%25 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %27**, i8, [7 x i8] }> +%27 = type { i32, [4 x i8], [4 x %29], i8*, i8*, i32, float, float, i32 } +%29 = type <{ %3, %3, %3, %3, %3, float, float, float, i32, i32, i32, i32, [4 x i8], i8*, float, i8, [3 x i8], float, float, i32, %3, %3, [4 x i8] }> +%30 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %32**, i8, [7 x i8] }> +%32 = type { i32 (...)**, i32, i32, i32, i8, %33*, %33*, float, float, %3, %3, %3 } +%33 = type <{ %0, %2, %3, %3, float, %3, %3, %3, %3, %3, %3, %3, float, float, i8, [3 x i8], float, float, float, float, float, float, %34*, %30, i32, i32, i32, [4 x i8] }> +%34 = type { i32 (...)** } +%35 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %33**, i8, [7 x i8] }> +%37 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %39**, i8, [7 x i8] }> +%39 = type { i32 (...)** } +%40 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8, [7 x i8] }> + +@gDisableDeactivation = external local_unnamed_addr global i8, align 1 +@0 = external dso_local unnamed_addr constant [29 x i8], align 1 +@1 = external dso_local unnamed_addr constant [14 x i8], align 1 +@2 = external dso_local unnamed_addr constant [22 x i8], align 1 +@gDeactivationTime = external local_unnamed_addr global float, align 4 + +declare void @_ZN15CProfileManager12Stop_ProfileEv() local_unnamed_addr + +declare void @_ZN15CProfileManager13Start_ProfileEPKc(i8*) local_unnamed_addr + +declare void @_ZN17btCollisionObject18setActivationStateEi(%0*, i32 signext) local_unnamed_addr + +declare hidden void @__clang_call_terminate(i8*) local_unnamed_addr + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0 + +define void @_ZN23btDiscreteDynamicsWorld28internalSingleStepSimulationEf(%6*, float) unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 { + invoke void null(%6* nonnull %0, float %1) + to label %5 unwind label %3 + +;