From 87a85f3d57f55f5652ec44f77816c7c9457545fa Mon Sep 17 00:00:00 2001 From: Luofan Chen Date: Sat, 15 Aug 2020 19:17:44 +0800 Subject: [PATCH] [Attributor] Use internalized version of non-exact functions This patch internalize non-exact functions and replaces of their uses with the internalized version. Doing this enables the analysis of non-exact functions. We can do this because some non-exact functions with the same name whose linkage is `linkonce_odr` or `weak_odr` should have the same semantics, so we can safely internalize and replace use of them (the result of the other version of this function should be the same.). Note that not all functions can be internalized, e.g., function with `linkonce` or `weak` linkage. For now when specified in commandline, we internalize all functions that meet the requirements without calculating the cost of such internalzation. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D84167 --- llvm/lib/Transforms/IPO/Attributor.cpp | 76 +++++++++++++ llvm/test/Transforms/Attributor/internalize.ll | 143 +++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 llvm/test/Transforms/Attributor/internalize.ll diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 764d71b..6599ff6 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Verifier.h" @@ -34,6 +36,7 @@ #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -88,6 +91,12 @@ static cl::opt "wrappers for non-exact definitions."), cl::init(false)); +static cl::opt + AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden, + cl::desc("Allow the Attributor to use IP information " + "derived from non-exact functions via cloning"), + cl::init(false)); + static cl::list SeedAllowList("attributor-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of attrbute names that are " @@ -1413,6 +1422,52 @@ static void createShallowWrapper(Function &F) { NumFnShallowWrapperCreated++; } +/// Make another copy of the function \p F such that the copied version has +/// internal linkage afterwards and can be analysed. Then we replace all uses +/// of the original function to the copied one +/// +/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` +/// linkage can be internalized because these linkages guarantee that other +/// definitions with the same name have the same semantics as this one +/// +static Function *internalizeFunction(Function &F) { + assert(AllowDeepWrapper && "Cannot create a copy if not allowed."); + assert(!F.isDeclaration() && !F.hasExactDefinition() && + !GlobalValue::isInterposableLinkage(F.getLinkage()) && + "Trying to internalize function which cannot be internalized."); + + Module &M = *F.getParent(); + FunctionType *FnTy = F.getFunctionType(); + + // create a copy of the current function + Function *Copied = + Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(), + F.getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F.args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns); + + // Copy metadata + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto MDIt : MDs) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F.getIterator(), Copied); + F.replaceAllUsesWith(Copied); + Copied->setDSOLocal(true); + + return Copied; +} + bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { @@ -2145,6 +2200,27 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, if (!A.isFunctionIPOAmendable(*F)) createShallowWrapper(*F); + // Internalize non-exact functions + // TODO: for now we eagerly internalize functions without calculating the + // cost, we need a cost interface to determine whether internalizing + // a function is "benefitial" + if (AllowDeepWrapper) { + for (Function *F : Functions) + if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() && + !GlobalValue::isInterposableLinkage(F->getLinkage())) { + Function *NewF = internalizeFunction(*F); + Functions.insert(NewF); + + // Update call graph + CGUpdater.registerOutlinedFunction(*NewF); + for (const Use &U : NewF->uses()) + if (CallBase *CB = dyn_cast(U.getUser())) { + auto *CallerF = CB->getCaller(); + CGUpdater.reanalyzeFunction(*CallerF); + } + } + } + for (Function *F : Functions) { if (F->hasExactDefinition()) NumFnWithExactDefinition++; diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll new file mode 100644 index 0000000..7773bfb --- /dev/null +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes +; Deep Wrapper disabled + +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,IS__CGSCC_____DISABLED,IS________OPM_DISABLED,IS__CGSCC_OPM_DISABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_OPM_DISABLED,IS__CGSCC_____DISABLED,IS________NPM_DISABLED,IS__CGSCC_NPM_DISABLED + +; Deep Wrapper enabled + +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED +; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER + +; TEST 1: This function is of linkage `linkonce`, we cannot internalize this +; function and use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner1.internalized +define linkonce i32 @inner1(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 2: This function is of linkage `weak`, we cannot internalize this function and +; use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner2.internalized +define weak i32 @inner2(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the +; deep wrapper, and the IP information derived from this function can be used +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define linkonce_odr i32 @inner3(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep +; wrapper +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define weak_odr i32 @inner4(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there +; is no need to internalize this +; +; DWRAPPER-NOT: inner5.internalized +define linkonce_odr i32 @inner5(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; Since the inner1 cannot be internalized, there should be no change to its callsite +; Since the inner2 cannot be internalized, there should be no change to its callsite +; Since the inner3 is internalized, the use of the original function should be replaced by the +; copied one +; +; DWRAPPER-NOT: call i32 @inner1.internalized +; DWRAPPER: call i32 @inner1 +; DWRAPPER-NOT: call i32 @inner2.internalized +; DWRAPPER: call i32 @inner2 +; DWRAPPER-NOT: call i32 @inner3 +; DWRAPPER: call i32 @inner3.internalized +; DWRAPPER-NOT: call i32 @inner4 +; DWRAPPER: call i32 @inner4.internalized +define i32 @outer1() { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2) +; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2) +; CHECK_DISABLED-NEXT: [[RET3:%.*]] = call i32 @inner3(i32 [[RET1]], i32 [[RET2]]) +; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]]) +; CHECK_DISABLED-NEXT: ret i32 [[RET4]] +; +; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_ENABLED-NEXT: entry: +; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2) +; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2) +; CHECK_ENABLED-NEXT: [[RET3:%.*]] = call i32 @inner3.internalized(i32 [[RET1]], i32 [[RET2]]) +; CHECK_ENABLED-NEXT: [[RET4:%.*]] = call i32 @inner4.internalized(i32 [[RET3]], i32 [[RET3]]) +; CHECK_ENABLED-NEXT: ret i32 [[RET4]] +; +entry: + %ret1 = call i32 @inner1(i32 1, i32 2) + %ret2 = call i32 @inner2(i32 1, i32 2) + %ret3 = call i32 @inner3(i32 %ret1, i32 %ret2) + %ret4 = call i32 @inner4(i32 %ret3, i32 %ret3) + ret i32 %ret4 +} -- 2.7.4