Refactor the Internalize stage of libLTO in a separate file (NFC)
authorMehdi Amini <mehdi.amini@apple.com>
Tue, 12 Apr 2016 06:34:10 +0000 (06:34 +0000)
committerMehdi Amini <mehdi.amini@apple.com>
Tue, 12 Apr 2016 06:34:10 +0000 (06:34 +0000)
This is intended to be shared by the ThinLTOCodeGenerator.

Note that there is a change in the way the verifier is run, previously
it was ran as a Pass on the merged module during internalization.
While now the verifier is called explicitely on the merged module
outside of the internalize "pass pipeline".

What remains strange in the API is the fact that `DisableVerify` in
the API does not disable this initial verifier.

Differential Revision: http://reviews.llvm.org/D19000

From: Mehdi Amini <mehdi.amini@apple.com>
llvm-svn: 266047

llvm/include/llvm/LTO/LTOCodeGenerator.h
llvm/lib/LTO/CMakeLists.txt
llvm/lib/LTO/LTOCodeGenerator.cpp
llvm/lib/LTO/LTOInternalize.cpp [new file with mode: 0644]
llvm/lib/LTO/LTOInternalize.h [new file with mode: 0644]
llvm/test/LTO/X86/disable-verify.ll

index 11ea125..a3b3fd7 100644 (file)
@@ -174,10 +174,6 @@ private:
   bool compileOptimizedToFile(const char **Name);
   void restoreLinkageForExternals();
   void applyScopeRestrictions();
-  void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
-                        std::vector<const char *> &MustPreserveList,
-                        SmallPtrSetImpl<GlobalValue *> &AsmUsed,
-                        Mangler &Mangler);
   bool determineTarget();
 
   static void DiagnosticHandler(const DiagnosticInfo &DI, void *Context);
index bd8ead1..6c8dcc6 100644 (file)
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMLTO
   LTOModule.cpp
   LTOCodeGenerator.cpp
+  LTOInternalize.cpp
   ThinLTOCodeGenerator.cpp
 
   ADDITIONAL_HEADER_DIRS
index 5ae662e..fa9d1bb 100644 (file)
@@ -13,6 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LTO/LTOCodeGenerator.h"
+
+#include "LTOInternalize.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Passes.h"
@@ -331,145 +333,13 @@ bool LTOCodeGenerator::determineTarget() {
   return true;
 }
 
-void LTOCodeGenerator::
-applyRestriction(GlobalValue &GV,
-                 ArrayRef<StringRef> Libcalls,
-                 std::vector<const char*> &MustPreserveList,
-                 SmallPtrSetImpl<GlobalValue*> &AsmUsed,
-                 Mangler &Mangler) {
-  // There are no restrictions to apply to declarations.
-  if (GV.isDeclaration())
-    return;
-
-  // There is nothing more restrictive than private linkage.
-  if (GV.hasPrivateLinkage())
-    return;
-
-  SmallString<64> Buffer;
-  TargetMach->getNameWithPrefix(Buffer, &GV, Mangler);
-
-  if (MustPreserveSymbols.count(Buffer))
-    MustPreserveList.push_back(GV.getName().data());
-  if (AsmUndefinedRefs.count(Buffer))
-    AsmUsed.insert(&GV);
-
-  // Conservatively append user-supplied runtime library functions to
-  // llvm.compiler.used.  These could be internalized and deleted by
-  // optimizations like -globalopt, causing problems when later optimizations
-  // add new library calls (e.g., llvm.memset => memset and printf => puts).
-  // Leave it to the linker to remove any dead code (e.g. with -dead_strip).
-  if (isa<Function>(GV) &&
-      std::binary_search(Libcalls.begin(), Libcalls.end(), GV.getName()))
-    AsmUsed.insert(&GV);
-
-  // Record the linkage type of non-local symbols so they can be restored prior
-  // to module splitting.
-  if (ShouldRestoreGlobalsLinkage && !GV.hasAvailableExternallyLinkage() &&
-      !GV.hasLocalLinkage() && GV.hasName())
-    ExternalSymbols.insert(std::make_pair(GV.getName(), GV.getLinkage()));
-}
-
-static void findUsedValues(GlobalVariable *LLVMUsed,
-                           SmallPtrSetImpl<GlobalValue*> &UsedValues) {
-  if (!LLVMUsed) return;
-
-  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
-  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
-    if (GlobalValue *GV =
-        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
-      UsedValues.insert(GV);
-}
-
-// Collect names of runtime library functions. User-defined functions with the
-// same names are added to llvm.compiler.used to prevent them from being
-// deleted by optimizations.
-static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
-                                      const TargetLibraryInfo& TLI,
-                                      const Module &Mod,
-                                      const TargetMachine &TM) {
-  // TargetLibraryInfo has info on C runtime library calls on the current
-  // target.
-  for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs);
-       I != E; ++I) {
-    LibFunc::Func F = static_cast<LibFunc::Func>(I);
-    if (TLI.has(F))
-      Libcalls.push_back(TLI.getName(F));
-  }
-
-  SmallPtrSet<const TargetLowering *, 1> TLSet;
-
-  for (const Function &F : Mod) {
-    const TargetLowering *Lowering =
-        TM.getSubtargetImpl(F)->getTargetLowering();
-
-    if (Lowering && TLSet.insert(Lowering).second)
-      // TargetLowering has info on library calls that CodeGen expects to be
-      // available, both from the C runtime and compiler-rt.
-      for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL);
-           I != E; ++I)
-        if (const char *Name =
-                Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I)))
-          Libcalls.push_back(Name);
-  }
-
-  array_pod_sort(Libcalls.begin(), Libcalls.end());
-  Libcalls.erase(std::unique(Libcalls.begin(), Libcalls.end()),
-                 Libcalls.end());
-}
-
 void LTOCodeGenerator::applyScopeRestrictions() {
   if (ScopeRestrictionsDone || !ShouldInternalize)
     return;
 
-  // Start off with a verification pass.
-  legacy::PassManager passes;
-  passes.add(createVerifierPass());
-
-  // mark which symbols can not be internalized
-  Mangler Mangler;
-  std::vector<const char*> MustPreserveList;
-  SmallPtrSet<GlobalValue*, 8> AsmUsed;
-  std::vector<StringRef> Libcalls;
-  TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple()));
-  TargetLibraryInfo TLI(TLII);
-
-  accumulateAndSortLibcalls(Libcalls, TLI, *MergedModule, *TargetMach);
-
-  for (Function &f : *MergedModule)
-    applyRestriction(f, Libcalls, MustPreserveList, AsmUsed, Mangler);
-  for (GlobalVariable &v : MergedModule->globals())
-    applyRestriction(v, Libcalls, MustPreserveList, AsmUsed, Mangler);
-  for (GlobalAlias &a : MergedModule->aliases())
-    applyRestriction(a, Libcalls, MustPreserveList, AsmUsed, Mangler);
-
-  GlobalVariable *LLVMCompilerUsed =
-    MergedModule->getGlobalVariable("llvm.compiler.used");
-  findUsedValues(LLVMCompilerUsed, AsmUsed);
-  if (LLVMCompilerUsed)
-    LLVMCompilerUsed->eraseFromParent();
-
-  if (!AsmUsed.empty()) {
-    llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context);
-    std::vector<Constant*> asmUsed2;
-    for (auto *GV : AsmUsed) {
-      Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
-      asmUsed2.push_back(c);
-    }
-
-    llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
-    LLVMCompilerUsed =
-      new llvm::GlobalVariable(*MergedModule, ATy, false,
-                               llvm::GlobalValue::AppendingLinkage,
-                               llvm::ConstantArray::get(ATy, asmUsed2),
-                               "llvm.compiler.used");
-
-    LLVMCompilerUsed->setSection("llvm.metadata");
-  }
-
-  passes.add(createInternalizePass(MustPreserveList));
-
-  // apply scope restrictions
-  passes.run(*MergedModule);
+  LTOInternalize(*MergedModule, *TargetMach, MustPreserveSymbols,
+                 AsmUndefinedRefs,
+                 (ShouldRestoreGlobalsLinkage ? &ExternalSymbols : nullptr));
 
   ScopeRestrictionsDone = true;
 }
@@ -510,6 +380,11 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
   if (!this->determineTarget())
     return false;
 
+  // We always run the verifier once on the merged module, the `DisableVerify`
+  // parameter only applies to subsequent verify.
+  if (verifyModule(*MergedModule, &dbgs()))
+    report_fatal_error("Broken module found, compilation aborted!");
+
   // Mark which symbols can not be internalized
   this->applyScopeRestrictions();
 
diff --git a/llvm/lib/LTO/LTOInternalize.cpp b/llvm/lib/LTO/LTOInternalize.cpp
new file mode 100644 (file)
index 0000000..b81ab85
--- /dev/null
@@ -0,0 +1,189 @@
+//==-LTOInternalize.cpp - LLVM Link Time Optimizer Internalization Utility -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a helper to run the internalization part of LTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LTOInternalize.h"
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+namespace {
+
+class ComputePreserveList {
+public:
+  ComputePreserveList(const StringSet<> &MustPreserveSymbols,
+                      const StringSet<> &AsmUndefinedRefs,
+                      const TargetMachine &TM, const Module &TheModule,
+                      StringMap<GlobalValue::LinkageTypes> *ExternalSymbols,
+                      std::vector<const char *> &MustPreserveList,
+                      SmallPtrSetImpl<const GlobalValue *> &AsmUsed)
+      : MustPreserveSymbols(MustPreserveSymbols),
+        AsmUndefinedRefs(AsmUndefinedRefs), TM(TM),
+        ExternalSymbols(ExternalSymbols), MustPreserveList(MustPreserveList),
+        AsmUsed(AsmUsed) {
+    accumulateAndSortLibcalls(TheModule);
+    for (const Function &F : TheModule)
+      applyRestriction(F);
+    for (const GlobalVariable &GV : TheModule.globals())
+      applyRestriction(GV);
+    for (const GlobalAlias &GA : TheModule.aliases())
+      applyRestriction(GA);
+  }
+
+private:
+  // Inputs
+  const StringSet<> &MustPreserveSymbols;
+  const StringSet<> AsmUndefinedRefs;
+  const TargetMachine &TM;
+
+  // Temps
+  Mangler Mangler;
+  std::vector<StringRef> Libcalls;
+
+  // Output
+  StringMap<GlobalValue::LinkageTypes> *ExternalSymbols;
+  std::vector<const char *> &MustPreserveList;
+  SmallPtrSetImpl<const GlobalValue *> &AsmUsed;
+
+  // Collect names of runtime library functions. User-defined functions with the
+  // same names are added to llvm.compiler.used to prevent them from being
+  // deleted by optimizations.
+  void accumulateAndSortLibcalls(const Module &TheModule) {
+    TargetLibraryInfoImpl TLII(Triple(TM.getTargetTriple()));
+    TargetLibraryInfo TLI(TLII);
+
+    // TargetLibraryInfo has info on C runtime library calls on the current
+    // target.
+    for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs);
+         I != E; ++I) {
+      LibFunc::Func F = static_cast<LibFunc::Func>(I);
+      if (TLI.has(F))
+        Libcalls.push_back(TLI.getName(F));
+    }
+
+    SmallPtrSet<const TargetLowering *, 1> TLSet;
+
+    for (const Function &F : TheModule) {
+      const TargetLowering *Lowering =
+          TM.getSubtargetImpl(F)->getTargetLowering();
+
+      if (Lowering && TLSet.insert(Lowering).second)
+        // TargetLowering has info on library calls that CodeGen expects to be
+        // available, both from the C runtime and compiler-rt.
+        for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL);
+             I != E; ++I)
+          if (const char *Name =
+                  Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I)))
+            Libcalls.push_back(Name);
+    }
+
+    array_pod_sort(Libcalls.begin(), Libcalls.end());
+    Libcalls.erase(std::unique(Libcalls.begin(), Libcalls.end()),
+                   Libcalls.end());
+  }
+
+  void applyRestriction(const GlobalValue &GV) {
+    // There are no restrictions to apply to declarations.
+    if (GV.isDeclaration())
+      return;
+
+    // There is nothing more restrictive than private linkage.
+    if (GV.hasPrivateLinkage())
+      return;
+
+    SmallString<64> Buffer;
+    TM.getNameWithPrefix(Buffer, &GV, Mangler);
+
+    if (MustPreserveSymbols.count(Buffer))
+      MustPreserveList.push_back(GV.getName().data());
+    if (AsmUndefinedRefs.count(Buffer))
+      AsmUsed.insert(&GV);
+
+    // Conservatively append user-supplied runtime library functions to
+    // llvm.compiler.used.  These could be internalized and deleted by
+    // optimizations like -globalopt, causing problems when later optimizations
+    // add new library calls (e.g., llvm.memset => memset and printf => puts).
+    // Leave it to the linker to remove any dead code (e.g. with -dead_strip).
+    if (isa<Function>(GV) &&
+        std::binary_search(Libcalls.begin(), Libcalls.end(), GV.getName()))
+      AsmUsed.insert(&GV);
+
+    // Record the linkage type of non-local symbols so they can be restored
+    // prior
+    // to module splitting.
+    if (ExternalSymbols && !GV.hasAvailableExternallyLinkage() &&
+        !GV.hasLocalLinkage() && GV.hasName())
+      ExternalSymbols->insert(std::make_pair(GV.getName(), GV.getLinkage()));
+  }
+};
+
+} // namespace anonymous
+
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSetImpl<const GlobalValue *> &UsedValues) {
+  if (!LLVMUsed)
+    return;
+
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV =
+            dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+void llvm::LTOInternalize(
+    Module &TheModule, const TargetMachine &TM,
+    const StringSet<> &MustPreserveSymbols, const StringSet<> &AsmUndefinedRefs,
+    StringMap<GlobalValue::LinkageTypes> *ExternalSymbols) {
+  legacy::PassManager passes;
+  // mark which symbols can not be internalized
+  Mangler Mangler;
+  std::vector<const char *> MustPreserveList;
+  SmallPtrSet<const GlobalValue *, 8> AsmUsed;
+
+  ComputePreserveList(MustPreserveSymbols, AsmUndefinedRefs, TM, TheModule,
+                      ExternalSymbols, MustPreserveList, AsmUsed);
+
+  GlobalVariable *LLVMCompilerUsed =
+      TheModule.getGlobalVariable("llvm.compiler.used");
+  findUsedValues(LLVMCompilerUsed, AsmUsed);
+  if (LLVMCompilerUsed)
+    LLVMCompilerUsed->eraseFromParent();
+
+  if (!AsmUsed.empty()) {
+    llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext());
+    std::vector<Constant *> asmUsed2;
+    for (const auto *GV : AsmUsed) {
+      Constant *c =
+          ConstantExpr::getBitCast(const_cast<GlobalValue *>(GV), i8PTy);
+      asmUsed2.push_back(c);
+    }
+
+    llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
+    LLVMCompilerUsed = new llvm::GlobalVariable(
+        TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage,
+        llvm::ConstantArray::get(ATy, asmUsed2), "llvm.compiler.used");
+
+    LLVMCompilerUsed->setSection("llvm.metadata");
+  }
+
+  passes.add(createInternalizePass(MustPreserveList));
+
+  // apply scope restrictions
+  passes.run(TheModule);
+}
diff --git a/llvm/lib/LTO/LTOInternalize.h b/llvm/lib/LTO/LTOInternalize.h
new file mode 100644 (file)
index 0000000..ebe5a6f
--- /dev/null
@@ -0,0 +1,30 @@
+//===-LTOInternalize.h - LLVM Link Time Optimizer Internalization Utility -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a helper class to run the internalization part of LTO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_LTOINTERNALIZE_H
+#define LLVM_LTO_LTOINTERNALIZE_H
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+
+namespace llvm {
+class Module;
+class TargetMachine;
+
+void LTOInternalize(Module &TheModule, const TargetMachine &TM,
+                    const StringSet<> &MustPreserveSymbols,
+                    const StringSet<> &AsmUndefinedRefs,
+                    StringMap<GlobalValue::LinkageTypes> *ExternalSymbols);
+}
+
+#endif // LLVM_LTO_LTOINTERNALIZE_H
index 5d2508a..d8b20d4 100644 (file)
@@ -6,10 +6,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; -disable-verify should disable verification from the optimization pipeline.
-; CHECK: Pass Arguments: -verify -internalize
+; CHECK: Pass Arguments: -internalize
 ; CHECK-NOT: -verify
 
-; VERIFY: Pass Arguments: -verify -internalize
+; VERIFY: Pass Arguments: -internalize
 ; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify
 
 define void @f() {