From 8045bf9d0dc5be3a8b8d075fdfe23828f4b7d70e Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Wed, 23 Mar 2022 14:51:16 +0000 Subject: [PATCH] [FuncSpec] Support function specialization across multiple arguments. The current implementation of Function Specialization does not allow specializing more than one arguments per function call, which is a limitation I am lifting with this patch. My main challenge was to choose the most suitable ADT for storing the specializations. We need an associative container for binding all the actual arguments of a specialization to the function call. We also need a consistent iteration order across executions. Lastly we want to be able to sort the entries by Gain and reject the least profitable ones. MapVector fits the bill but not quite; erasing elements is expensive and using stable_sort messes up the indices to the underlying vector. I am therefore using the underlying vector directly after calculating the Gain. Differential Revision: https://reviews.llvm.org/D119880 --- llvm/include/llvm/Transforms/Utils/SCCPSolver.h | 15 +- llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 174 ++++++++++--------- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 23 ++- .../function-specialization4.ll | 4 +- .../specialize-multiple-arguments.ll | 185 +++++++++++++++++++++ 5 files changed, 306 insertions(+), 95 deletions(-) create mode 100644 llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h index fb94b1d..17bd072 100644 --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -151,13 +151,14 @@ public: /// Return a reference to the set of argument tracked functions. SmallPtrSetImpl &getArgumentTrackedFunctions(); - /// Mark the constant argument of a new function specialization. \p F points - /// to the cloned function and \p Arg represents the constant argument as a - /// pair of {formal,actual} values (the formal argument is associated with the - /// original function definition). All other arguments of the specialization - /// inherit the lattice state of their corresponding values in the original - /// function. - void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg); + /// Mark the constant arguments of a new function specialization. \p F points + /// to the cloned function and \p Args contains a list of constant arguments + /// represented as pairs of {formal,actual} values (the formal argument is + /// associated with the original function definition). All other arguments of + /// the specialization inherit the lattice state of their corresponding values + /// in the original function. + void markArgInFuncSpecialization(Function *F, + const SmallVectorImpl &Args); /// Mark all of the blocks in function \p F non-executable. Clients can used /// this method to erase a function from the module (e.g., if it has been diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 8faca67..c9775e0 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -99,8 +99,13 @@ static cl::opt SpecializeOnAddresses( "func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values")); -// TODO: This needs checking to see the impact on compile-times, which is why -// this is off by default for now. +// Disabled by default as it can significantly increase compilation times. +// Running nikic's compile time tracker on x86 with instruction count as the +// metric shows 3-4% regression for SPASS while being neutral for all other +// benchmarks of the llvm test suite. +// +// https://llvm-compile-time-tracker.com +// https://github.com/nikic/llvm-compile-time-tracker static cl::opt EnableSpecializationForLiteralConstant( "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " @@ -110,17 +115,17 @@ namespace { // Bookkeeping struct to pass data from the analysis and profitability phase // to the actual transform helper functions. struct SpecializationInfo { - ArgInfo Arg; // Stores the {formal,actual} argument pair. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. - - SpecializationInfo(Argument *A, Constant *C, InstructionCost G) - : Arg(A, C), Gain(G){}; + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. }; } // Anonymous namespace using FuncList = SmallVectorImpl; -using ConstList = SmallVector; -using SpecializationList = SmallVector; +using CallArgBinding = std::pair; +using CallSpecBinding = std::pair; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector; // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the @@ -307,17 +312,15 @@ public: LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " << F->getName() << " is " << Cost << "\n"); - SpecializationList Specializations; - calculateGains(F, Cost, Specializations); - if (Specializations.empty()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n"); + SmallVector Specializations; + if (!calculateGains(F, Cost, Specializations)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n"); continue; } - for (SpecializationInfo &S : Specializations) { - specializeFunction(F, S, WorkList); - Changed = true; - } + Changed = true; + for (auto &Entry : Specializations) + specializeFunction(F, Entry.second, WorkList); } updateSpecializedFuncs(Candidates, WorkList); @@ -392,21 +395,22 @@ private: return Clone; } - /// This function decides whether it's worthwhile to specialize function \p F - /// based on the known constant values its arguments can take on, i.e. it - /// calculates a gain and returns a list of actual arguments that are deemed - /// profitable to specialize. Specialization is performed on the first - /// interesting argument. Specializations based on additional arguments will - /// be evaluated on following iterations of the main IPSCCP solve loop. - void calculateGains(Function *F, InstructionCost Cost, - SpecializationList &WorkList) { + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool calculateGains(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList) { + SpecializationMap Specializations; // Determine if we should specialize the function based on the values the // argument can take on. If specialization is not profitable, we continue // on to the next argument. for (Argument &FormalArg : F->args()) { // Determine if this argument is interesting. If we know the argument can // take on any constant values, they are collected in Constants. - ConstList ActualArgs; + SmallVector ActualArgs; if (!isArgumentInteresting(&FormalArg, ActualArgs)) { LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " << FormalArg.getNameOrAsOperand() @@ -414,50 +418,56 @@ private: continue; } - for (auto *ActualArg : ActualArgs) { - InstructionCost Gain = - ForceFunctionSpecialization - ? 1 - : getSpecializationBonus(&FormalArg, ActualArg) - Cost; + for (const auto &Entry : ActualArgs) { + CallBase *Call = Entry.first; + Constant *ActualArg = Entry.second; - if (Gain <= 0) - continue; - WorkList.push_back({&FormalArg, ActualArg, Gain}); - } + auto I = Specializations.insert({Call, SpecializationInfo()}); + SpecializationInfo &S = I.first->second; - if (WorkList.empty()) - continue; - - // Sort the candidates in descending order. - llvm::stable_sort(WorkList, [](const SpecializationInfo &L, - const SpecializationInfo &R) { - return L.Gain > R.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if - // necessary. - if (WorkList.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + if (I.second) + S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost; + if (!ForceFunctionSpecialization) + S.Gain += getSpecializationBonus(&FormalArg, ActualArg); + S.Args.push_back({&FormalArg, ActualArg}); } + } + + // Remove unprofitable specializations. + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); + + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); + + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); + + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + } - LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " - << F->getName() << "\n"; - for (SpecializationInfo &S - : WorkList) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) dbgs() << "FnSpecialization: FormalArg = " - << S.Arg.Formal->getNameOrAsOperand() + << Arg.Formal->getNameOrAsOperand() << ", ActualArg = " - << S.Arg.Actual->getNameOrAsOperand() - << ", Gain = " << S.Gain << "\n"; - }); + << Arg.Actual->getNameOrAsOperand() << "\n"; + }); - // FIXME: Only one argument per function. - break; - } + return !WorkList.empty(); } bool isCandidateFunction(Function *F) { @@ -490,12 +500,12 @@ private: Function *Clone = cloneCandidateFunction(F, Mappings); // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(Clone, S.Arg, Mappings); + rewriteCallSites(Clone, S.Args, Mappings); // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. - Solver.markArgInFuncSpecialization(Clone, S.Arg); + Solver.markArgInFuncSpecialization(Clone, S.Args); // Mark all the specialized functions WorkList.push_back(Clone); @@ -641,7 +651,8 @@ private: /// /// \returns true if the function should be specialized on the given /// argument. - bool isArgumentInteresting(Argument *A, ConstList &Constants) { + bool isArgumentInteresting(Argument *A, + SmallVectorImpl &Constants) { // For now, don't attempt to specialize functions based on the values of // composite types. if (!A->getType()->isSingleValueType() || A->user_empty()) @@ -681,7 +692,8 @@ private: /// Collect in \p Constants all the constant values that argument \p A can /// take on. - void getPossibleConstants(Argument *A, ConstList &Constants) { + void getPossibleConstants(Argument *A, + SmallVectorImpl &Constants) { Function *F = A->getParent(); // Iterate over all the call sites of the argument's parent function. @@ -723,23 +735,24 @@ private: if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || EnableSpecializationForLiteralConstant)) - Constants.push_back(cast(V)); + Constants.push_back({&CS, cast(V)}); } } /// Rewrite calls to function \p F to call function \p Clone instead. /// /// This function modifies calls to function \p F as long as the actual - /// argument matches the one in \p Arg. Note that for recursive calls we - /// need to compare against the cloned formal argument. + /// arguments match those in \p Args. Note that for recursive calls we + /// need to compare against the cloned formal arguments. /// /// Callsites that have been marked with the MinSize function attribute won't /// be specialized and rewritten. - void rewriteCallSites(Function *Clone, const ArgInfo &Arg, + void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, ValueToValueMapTy &Mappings) { - Function *F = Arg.Formal->getParent(); - unsigned ArgNo = Arg.Formal->getArgNo(); - SmallVector CallSitesToRewrite; + assert(!Args.empty() && "Specialization without arguments"); + Function *F = Args[0].Formal->getParent(); + + SmallVector CallSitesToRewrite; for (auto *U : F->users()) { if (!isa(U) && !isa(U)) continue; @@ -758,9 +771,16 @@ private: << "\n"); if (/* recursive call */ (CS->getFunction() == Clone && - CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]) || + all_of(Args, + [CS, &Mappings](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; + })) || /* normal call */ - CS->getArgOperand(ArgNo) == Arg.Actual) { + all_of(Args, [CS](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Arg.Actual; + })) { CS->setCalledFunction(Clone); Solver.markOverdefined(CS); } @@ -891,7 +911,7 @@ bool llvm::runFunctionSpecialization( // Initially resolve the constants in all the argument tracked functions. RunSCCPSolver(FuncDecls); - SmallVector WorkList; + SmallVector WorkList; unsigned I = 0; while (FuncSpecializationMaxIters != I++ && FS.specializeFunctions(FuncDecls, WorkList)) { diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 88dd5e6..607928c 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -450,7 +450,8 @@ public: return TrackingIncomingArguments; } - void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg); + void markArgInFuncSpecialization(Function *F, + const SmallVectorImpl &Args); void markFunctionUnreachable(Function *F) { for (auto &BB : *F) @@ -524,21 +525,24 @@ Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const { return nullptr; } -void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, - const ArgInfo &Arg) { - assert(F->arg_size() == Arg.Formal->getParent()->arg_size() && +void SCCPInstVisitor::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl &Args) { + assert(!Args.empty() && "Specialization without arguments"); + assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() && "Functions should have the same number of arguments"); + auto Iter = Args.begin(); Argument *NewArg = F->arg_begin(); - Argument *OldArg = Arg.Formal->getParent()->arg_begin(); + Argument *OldArg = Args[0].Formal->getParent()->arg_begin(); for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) { LLVM_DEBUG(dbgs() << "SCCP: Marking argument " << NewArg->getNameOrAsOperand() << "\n"); - if (OldArg == Arg.Formal) { + if (OldArg == Iter->Formal) { // Mark the argument constants in the new function. - markConstant(NewArg, Arg.Actual); + markConstant(NewArg, Iter->Actual); + ++Iter; } else if (ValueState.count(OldArg)) { // For the remaining arguments in the new function, copy the lattice state // over from the old function. @@ -1717,8 +1721,9 @@ SmallPtrSetImpl &SCCPSolver::getArgumentTrackedFunctions() { return Visitor->getArgumentTrackedFunctions(); } -void SCCPSolver::markArgInFuncSpecialization(Function *F, const ArgInfo &Arg) { - Visitor->markArgInFuncSpecialization(F, Arg); +void SCCPSolver::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl &Args) { + Visitor->markArgInFuncSpecialization(F, Args); } void SCCPSolver::markFunctionUnreachable(Function *F) { diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll index 35ad27e..787e6e6 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -46,7 +46,7 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = load i32, i32* @A, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, i32* %c, align 4 +; CHECK-NEXT: %1 = load i32, i32* @C, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } @@ -55,7 +55,7 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = load i32, i32* @B, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, i32* %c, align 4 +; CHECK-NEXT: %1 = load i32, i32* @D, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll new file mode 100644 index 0000000..b4f28fd --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -function-specialization -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -function-specialization -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -function-specialization -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -function-specialization -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE + +; Make sure that we iterate correctly after sorting the specializations: +; FnSpecialization: Specializations for function compute +; FnSpecialization: Gain = 608 +; FnSpecialization: FormalArg = binop1, ActualArg = power +; FnSpecialization: FormalArg = binop2, ActualArg = mul +; FnSpecialization: Gain = 982 +; FnSpecialization: FormalArg = binop1, ActualArg = plus +; FnSpecialization: FormalArg = binop2, ActualArg = minus +; FnSpecialization: Gain = 795 +; FnSpecialization: FormalArg = binop1, ActualArg = minus +; FnSpecialization: FormalArg = binop2, ActualArg = power + +define i64 @main(i64 %x, i64 %y, i1 %flag) { +; NONE-LABEL: @main( +; NONE-NEXT: entry: +; NONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; NONE: plus: +; NONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; NONE-NEXT: br label [[MERGE:%.*]] +; NONE: minus: +; NONE-NEXT: [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; NONE-NEXT: br label [[MERGE]] +; NONE: merge: +; NONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; NONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; NONE-NEXT: ret i64 [[TMP3]] +; +; ONE-LABEL: @main( +; ONE-NEXT: entry: +; ONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; ONE: plus: +; ONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; ONE-NEXT: br label [[MERGE:%.*]] +; ONE: minus: +; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; ONE-NEXT: br label [[MERGE]] +; ONE: merge: +; ONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; ONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; ONE-NEXT: ret i64 [[TMP3]] +; +; TWO-LABEL: @main( +; TWO-NEXT: entry: +; TWO-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; TWO: plus: +; TWO-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; TWO-NEXT: br label [[MERGE:%.*]] +; TWO: minus: +; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; TWO-NEXT: br label [[MERGE]] +; TWO: merge: +; TWO-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; TWO-NEXT: ret i64 [[TMP3]] +; +; THREE-LABEL: @main( +; THREE-NEXT: entry: +; THREE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; THREE: plus: +; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.3(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; THREE-NEXT: br label [[MERGE:%.*]] +; THREE: minus: +; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; THREE-NEXT: br label [[MERGE]] +; THREE: merge: +; THREE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; THREE-NEXT: ret i64 [[TMP3]] +; +entry: + br i1 %flag, label %plus, label %minus + +plus: + %tmp0 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @power, i64 (i64, i64)* @mul) + br label %merge + +minus: + %tmp1 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) + br label %merge + +merge: + %tmp2 = phi i64 [ %tmp0, %plus ], [ %tmp1, %minus] + %tmp3 = call i64 @compute(i64 %tmp2, i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) + ret i64 %tmp3 +} + +; THREE-NOT: define internal i64 @compute +; +; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +define internal i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +entry: + %tmp0 = call i64 %binop1(i64 %x, i64 %y) + %tmp1 = call i64 %binop2(i64 %x, i64 %y) + %add = add i64 %tmp0, %tmp1 + %div = sdiv i64 %add, %x + %sub = sub i64 %div, %y + %mul = mul i64 %sub, 2 + ret i64 %mul +} + +define internal i64 @plus(i64 %x, i64 %y) { +entry: + %tmp0 = add i64 %x, %y + ret i64 %tmp0 +} + +define internal i64 @minus(i64 %x, i64 %y) { +entry: + %tmp0 = sub i64 %x, %y + ret i64 %tmp0 +} + +define internal i64 @mul(i64 %x, i64 %n) { +entry: + %cmp6 = icmp sgt i64 %n, 1 + br i1 %cmp6, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %add, %for.body ] + ret i64 %x.addr.0.lcssa + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %x.addr.07 = phi i64 [ %add, %for.body ], [ %x, %entry ] + %add = shl nsw i64 %x.addr.07, 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define internal i64 @power(i64 %x, i64 %n) { +entry: + %cmp6 = icmp sgt i64 %n, 1 + br i1 %cmp6, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %mul, %for.body ] + ret i64 %x.addr.0.lcssa + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %x.addr.07 = phi i64 [ %mul, %for.body ], [ %x, %entry ] + %mul = mul nsw i64 %x.addr.07, %x.addr.07 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} -- 2.7.4