From 7ca744488fa4c8e2b7d0fc88c63840d7f9000f54 Mon Sep 17 00:00:00 2001 From: Xin Tong Date: Mon, 5 Nov 2018 15:49:46 +0000 Subject: [PATCH] [ThinLTO] Add an option to disable (thin)lto internalization. Summary: LTO and ThinLTO optimizes the IR differently. One source of differences is the amount of internalizations that can happen. Add an option to enable/disable internalization so that other differences can be studied in isolation. e.g. inlining. There are other things lto and thinlto do differently, I will add flags to enable/disable them as needed. Reviewers: tejohnson, pcc, steven_wu Subscribers: mehdi_amini, inglorion, steven_wu, dexonsmith, dang, llvm-commits Differential Revision: https://reviews.llvm.org/D53294 llvm-svn: 346140 --- llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h | 5 ++- llvm/lib/LTO/LTO.cpp | 10 ++++-- llvm/test/LTO/X86/internalize.ll | 42 +++++++++++++++++++++++++ llvm/test/ThinLTO/X86/internalize.ll | 21 +++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 llvm/test/LTO/X86/internalize.ll diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h index f48ab02..8f23b7cb 100644 --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -48,6 +48,9 @@ #include #include +/// Enable global value internalization in LTO. +extern llvm::cl::opt EnableLTOInternalization; + namespace llvm { template class ArrayRef; class LLVMContext; @@ -233,7 +236,7 @@ private: unsigned OptLevel = 2; lto_diagnostic_handler_t DiagHandler = nullptr; void *DiagContext = nullptr; - bool ShouldInternalize = true; + bool ShouldInternalize = EnableLTOInternalization; bool ShouldEmbedUselists = false; bool ShouldRestoreGlobalsLinkage = false; TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile; diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 6942cb2..2726b67 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -56,6 +56,11 @@ static cl::opt DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden, cl::desc("Dump the SCCs in the ThinLTO index's callgraph")); +/// Enable global value internalization in LTO. +cl::opt EnableLTOInternalization( + "enable-lto-internalization", cl::init(true), cl::Hidden, + cl::desc("Enable global value internalization in LTO")); + // Returns a unique hash for the Module considering the current list of // export/import and other global analysis results. // The hash is produced in \p Key. @@ -344,7 +349,8 @@ static void thinLTOInternalizeAndPromoteGUID( if (isExported(S->modulePath(), GUID)) { if (GlobalValue::isLocalLinkage(S->linkage())) S->setLinkage(GlobalValue::ExternalLinkage); - } else if (!GlobalValue::isLocalLinkage(S->linkage())) + } else if (EnableLTOInternalization && + !GlobalValue::isLocalLinkage(S->linkage())) S->setLinkage(GlobalValue::InternalLinkage); } } @@ -876,7 +882,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { continue; GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); - if (R.second.Partition == 0) + if (EnableLTOInternalization && R.second.Partition == 0) GV->setLinkage(GlobalValue::InternalLinkage); } diff --git a/llvm/test/LTO/X86/internalize.ll b/llvm/test/LTO/X86/internalize.ll new file mode 100644 index 0000000..6b18aa7 --- /dev/null +++ b/llvm/test/LTO/X86/internalize.ll @@ -0,0 +1,42 @@ +; RUN: opt %s -o %t1.bc + +; RUN: llvm-lto %t1.bc -o %t1.save.opt --exported-symbol=_foo -save-merged-module -O0 +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s --check-prefix=INTERNALIZE + +; Test the enable-lto-internalization option by setting it to false. +; This makes sure internalization does not happen. +; RUN: llvm-lto %t1.bc -enable-lto-internalization=false -o %t1.save.opt \ +; RUN: --exported-symbol=_foo -save-merged-module -O0 +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s --check-prefix=INTERNALIZE-OPTION-DISABLE + +; RUN: llvm-lto2 run %t1.bc -o %t.o -save-temps \ +; RUN: -r=%t1.bc,_foo,pxl \ +; RUN: -r=%t1.bc,_bar,pl +; RUN: llvm-dis < %t.o.0.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE2 + +; Test the enable-lto-internalization option by setting it to false. +; This makes sure internalization does not happen in runRegularLTO(). +; RUN: llvm-lto2 run %t1.bc -o %t.o -save-temps -enable-lto-internalization=false \ +; RUN: -r=%t1.bc,_foo,pxl \ +; RUN: -r=%t1.bc,_bar,pl +; RUN: llvm-dis < %t.o.0.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE2-OPTION-DISABLE + +; INTERNALIZE: define void @foo +; INTERNALIZE: define internal void @bar +; INTERNALIZE-OPTION-DISABLE: define void @foo +; INTERNALIZE-OPTION-DISABLE: define void @bar +; INTERNALIZE2: define dso_local void @foo +; INTERNALIZE2: define internal void @bar +; INTERNALIZE2-OPTION-DISABLE: define dso_local void @foo +; INTERNALIZE2-OPTION-DISABLE: define dso_local void @bar + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @foo() { + call void @bar() + ret void +} +define void @bar() { + ret void +} diff --git a/llvm/test/ThinLTO/X86/internalize.ll b/llvm/test/ThinLTO/X86/internalize.ll index 433cfe4..70b2846 100644 --- a/llvm/test/ThinLTO/X86/internalize.ll +++ b/llvm/test/ThinLTO/X86/internalize.ll @@ -3,12 +3,27 @@ ; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR ; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE +; Test the enable-lto-internalization option by setting it to false. +; This makes sure indices are not marked as internallinkage and therefore +; internalization does not happen. +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc \ +; RUN: -enable-lto-internalization=false --exported-symbol=foo +; RUN: llvm-dis < %t1.bc.thinlto.internalized.bc | FileCheck %s --check-prefix=INTERNALIZE-OPTION-DISABLE + ; RUN: llvm-lto2 run %t1.bc -o %t.o -save-temps \ ; RUN: -r=%t1.bc,_foo,pxl \ ; RUN: -r=%t1.bc,_bar,pl \ ; RUN: -r=%t1.bc,_linkonce_func,pl ; RUN: llvm-dis < %t.o.1.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE2 +; Test the enable-lto-internalization option by setting it to false. +; This makes sure indices are not marked as internallinkage and therefore +; internalization does not happen. +; RUN: llvm-lto2 run %t1.bc -o %t.o -save-temps -enable-lto-internalization=false \ +; RUN: -r=%t1.bc,_foo,pxl \ +; RUN: -r=%t1.bc,_bar,pl \ +; RUN: -r=%t1.bc,_linkonce_func,pl +; RUN: llvm-dis < %t.o.1.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE2-OPTION-DISABLE ; REGULAR: define void @foo ; REGULAR: define void @bar @@ -16,9 +31,15 @@ ; INTERNALIZE: define void @foo ; INTERNALIZE: define internal void @bar ; INTERNALIZE: define internal void @linkonce_func() +; INTERNALIZE-OPTION-DISABLE: define void @foo +; INTERNALIZE-OPTION-DISABLE: define void @bar +; INTERNALIZE-OPTION-DISABLE: define linkonce void @linkonce_func() ; INTERNALIZE2: define dso_local void @foo ; INTERNALIZE2: define internal void @bar ; INTERNALIZE2: define internal void @linkonce_func() +; INTERNALIZE2-OPTION-DISABLE: define dso_local void @foo +; INTERNALIZE2-OPTION-DISABLE: define dso_local void @bar +; INTERNALIZE2-OPTION-DISABLE: define weak dso_local void @linkonce_func() target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" -- 2.7.4