#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
"wrappers for non-exact definitions."),
cl::init(false));
+static cl::opt<bool>
+ AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
+ cl::desc("Allow the Attributor to use IP information "
+ "derived from non-exact functions via cloning"),
+ cl::init(false));
+
static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of attrbute names that are "
NumFnShallowWrapperCreated++;
}
+/// Make another copy of the function \p F such that the copied version has
+/// internal linkage afterwards and can be analysed. Then we replace all uses
+/// of the original function to the copied one
+///
+/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr`
+/// linkage can be internalized because these linkages guarantee that other
+/// definitions with the same name have the same semantics as this one
+///
+static Function *internalizeFunction(Function &F) {
+ assert(AllowDeepWrapper && "Cannot create a copy if not allowed.");
+ assert(!F.isDeclaration() && !F.hasExactDefinition() &&
+ !GlobalValue::isInterposableLinkage(F.getLinkage()) &&
+ "Trying to internalize function which cannot be internalized.");
+
+ Module &M = *F.getParent();
+ FunctionType *FnTy = F.getFunctionType();
+
+ // create a copy of the current function
+ Function *Copied =
+ Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(),
+ F.getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F.args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F.getIterator(), Copied);
+ F.replaceAllUsesWith(Copied);
+ Copied->setDSOLocal(true);
+
+ return Copied;
+}
+
bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
if (!A.isFunctionIPOAmendable(*F))
createShallowWrapper(*F);
+ // Internalize non-exact functions
+ // TODO: for now we eagerly internalize functions without calculating the
+ // cost, we need a cost interface to determine whether internalizing
+ // a function is "benefitial"
+ if (AllowDeepWrapper) {
+ for (Function *F : Functions)
+ if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
+ !GlobalValue::isInterposableLinkage(F->getLinkage())) {
+ Function *NewF = internalizeFunction(*F);
+ Functions.insert(NewF);
+
+ // Update call graph
+ CGUpdater.registerOutlinedFunction(*NewF);
+ for (const Use &U : NewF->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
+ auto *CallerF = CB->getCaller();
+ CGUpdater.reanalyzeFunction(*CallerF);
+ }
+ }
+ }
+
for (Function *F : Functions) {
if (F->hasExactDefinition())
NumFnWithExactDefinition++;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes
+; Deep Wrapper disabled
+
+; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED
+; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,IS__CGSCC_____DISABLED,IS________OPM_DISABLED,IS__CGSCC_OPM_DISABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_OPM_DISABLED,IS__CGSCC_____DISABLED,IS________NPM_DISABLED,IS__CGSCC_NPM_DISABLED
+
+; Deep Wrapper enabled
+
+; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED
+; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED
+; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER
+
+; TEST 1: This function is of linkage `linkonce`, we cannot internalize this
+; function and use information derived from it
+;
+; DWRAPPER-NOT: Function Attrs
+; DWRAPPER-NOT: inner1.internalized
+define linkonce i32 @inner1(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner1
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+; TEST 2: This function is of linkage `weak`, we cannot internalize this function and
+; use information derived from it
+;
+; DWRAPPER-NOT: Function Attrs
+; DWRAPPER-NOT: inner2.internalized
+define weak i32 @inner2(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner2
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the
+; deep wrapper, and the IP information derived from this function can be used
+;
+; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b)
+; DWRAPPER-NEXT: entry:
+; DWRAPPER-NEXT: %c = add i32 %a, %b
+; DWRAPPER-NEXT: ret i32 %c
+define linkonce_odr i32 @inner3(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner3
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep
+; wrapper
+;
+; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b)
+; DWRAPPER-NEXT: entry:
+; DWRAPPER-NEXT: %c = add i32 %a, %b
+; DWRAPPER-NEXT: ret i32 %c
+define weak_odr i32 @inner4(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner4
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there
+; is no need to internalize this
+;
+; DWRAPPER-NOT: inner5.internalized
+define linkonce_odr i32 @inner5(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner5
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+; Since the inner1 cannot be internalized, there should be no change to its callsite
+; Since the inner2 cannot be internalized, there should be no change to its callsite
+; Since the inner3 is internalized, the use of the original function should be replaced by the
+; copied one
+;
+; DWRAPPER-NOT: call i32 @inner1.internalized
+; DWRAPPER: call i32 @inner1
+; DWRAPPER-NOT: call i32 @inner2.internalized
+; DWRAPPER: call i32 @inner2
+; DWRAPPER-NOT: call i32 @inner3
+; DWRAPPER: call i32 @inner3.internalized
+; DWRAPPER-NOT: call i32 @inner4
+; DWRAPPER: call i32 @inner4.internalized
+define i32 @outer1() {
+; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1()
+; CHECK_DISABLED-NEXT: entry:
+; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2)
+; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2)
+; CHECK_DISABLED-NEXT: [[RET3:%.*]] = call i32 @inner3(i32 [[RET1]], i32 [[RET2]])
+; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]])
+; CHECK_DISABLED-NEXT: ret i32 [[RET4]]
+;
+; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1()
+; CHECK_ENABLED-NEXT: entry:
+; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2)
+; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2)
+; CHECK_ENABLED-NEXT: [[RET3:%.*]] = call i32 @inner3.internalized(i32 [[RET1]], i32 [[RET2]])
+; CHECK_ENABLED-NEXT: [[RET4:%.*]] = call i32 @inner4.internalized(i32 [[RET3]], i32 [[RET3]])
+; CHECK_ENABLED-NEXT: ret i32 [[RET4]]
+;
+entry:
+ %ret1 = call i32 @inner1(i32 1, i32 2)
+ %ret2 = call i32 @inner2(i32 1, i32 2)
+ %ret3 = call i32 @inner3(i32 %ret1, i32 %ret2)
+ %ret4 = call i32 @inner4(i32 %ret3, i32 %ret3)
+ ret i32 %ret4
+}