[Inliner] Inlining should honor nobuiltin attributes
authorTeresa Johnson <tejohnson@google.com>
Thu, 6 Feb 2020 21:28:41 +0000 (13:28 -0800)
committerTeresa Johnson <tejohnson@google.com>
Fri, 28 Feb 2020 15:34:14 +0000 (07:34 -0800)
Summary:
Final patch in series to fix inlining between functions with different
nobuiltin attributes/options, which was specifically an issue in LTO.
See discussion on D61634 for background.

The prior patch in this series (D67923) enabled per-Function TLI
construction that identified the nobuiltin attributes.

Here I have allowed inlining to proceed if the callee's nobuiltins are a
subset of the caller's nobuiltins, but not in the reverse case, which
should be conservatively correct. This is controlled by a new option,
-inline-caller-superset-nobuiltin, which is enabled by default.

Reviewers: hfinkel, gchatelet, chandlerc, davidxl

Subscribers: arsenm, jvesely, nhaehnle, mehdi_amini, eraman, hiraditya, haicheng, dexonsmith, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D74162

llvm/include/llvm/Analysis/InlineCost.h
llvm/include/llvm/Analysis/TargetLibraryInfo.h
llvm/include/llvm/Transforms/IPO/Inliner.h
llvm/lib/Analysis/InlineCost.cpp
llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
llvm/lib/Transforms/IPO/InlineSimple.cpp
llvm/lib/Transforms/IPO/Inliner.cpp
llvm/lib/Transforms/IPO/PartialInlining.cpp
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/Transforms/Inline/inline-no-builtin-compatible.ll [new file with mode: 0644]

index fcb6305..0900162 100644 (file)
@@ -27,6 +27,7 @@ class DataLayout;
 class Function;
 class ProfileSummaryInfo;
 class TargetTransformInfo;
+class TargetLibraryInfo;
 
 namespace InlineConstants {
 // Various thresholds used by inline cost analysis.
@@ -219,6 +220,7 @@ InlineCost getInlineCost(
     CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
 
 /// Get an InlineCost with the callee explicitly specified.
@@ -231,6 +233,7 @@ getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
               TargetTransformInfo &CalleeTTI,
               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
               Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+              function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
               ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
 
 /// Minimal filter to detect invalid constructs for inlining.
index fdcb448..0860b72 100644 (file)
@@ -260,6 +260,21 @@ public:
     return *this;
   }
 
+  /// Determine whether a callee with the given TLI can be inlined into
+  /// caller with this TLI, based on 'nobuiltin' attributes. When requested,
+  /// allow inlining into a caller with a superset of the callee's nobuiltin
+  /// attributes, which is conservatively correct.
+  bool areInlineCompatible(const TargetLibraryInfo &CalleeTLI,
+                           bool AllowCallerSuperset) const {
+    if (!AllowCallerSuperset)
+      return OverrideAsUnavailable == CalleeTLI.OverrideAsUnavailable;
+    BitVector B = OverrideAsUnavailable;
+    B |= CalleeTLI.OverrideAsUnavailable;
+    // We can inline if the union of the caller and callee's nobuiltin
+    // attributes is no stricter than the caller's nobuiltin attributes.
+    return B == OverrideAsUnavailable;
+  }
+
   /// Searches for a particular function name.
   ///
   /// If it is one of the known library functions, return true and set F to the
index 8202b94..38137e9 100644 (file)
@@ -74,6 +74,7 @@ private:
 protected:
   AssumptionCacheTracker *ACT;
   ProfileSummaryInfo *PSI;
+  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
   ImportedFunctionsInliningStatistics ImportedFunctionsStats;
 };
 
index 4182452..b4aa2fb 100644 (file)
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
@@ -103,6 +104,12 @@ static cl::opt<bool> OptComputeFullInlineCost(
     cl::desc("Compute the full inline cost of a call site even when the cost "
              "exceeds the threshold."));
 
+static cl::opt<bool> InlineCallerSupersetNoBuiltin(
+    "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
+    cl::ZeroOrMore,
+    cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
+             "attributes."));
+
 namespace {
 class InlineCostCallAnalyzer;
 
@@ -2146,10 +2153,17 @@ LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
 
 /// Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
-static bool functionsHaveCompatibleAttributes(Function *Caller,
-                                              Function *Callee,
-                                              TargetTransformInfo &TTI) {
+static bool functionsHaveCompatibleAttributes(
+    Function *Caller, Function *Callee, TargetTransformInfo &TTI,
+    function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
+  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
+  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
+  // object, and always returns the same object (which is overwritten on each
+  // GetTLI call). Therefore we copy the first result.
+  auto CalleeTLI = GetTLI(*Callee);
   return TTI.areInlineCompatible(Caller, Callee) &&
+         GetTLI(*Caller).areInlineCompatible(CalleeTLI,
+                                             InlineCallerSupersetNoBuiltin) &&
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
@@ -2190,9 +2204,10 @@ InlineCost llvm::getInlineCost(
     CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
   return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
-                       GetAssumptionCache, GetBFI, PSI, ORE);
+                       GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
 }
 
 InlineCost llvm::getInlineCost(
@@ -2200,6 +2215,7 @@ InlineCost llvm::getInlineCost(
     TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
 
   // Cannot inline indirect calls.
@@ -2232,7 +2248,7 @@ InlineCost llvm::getInlineCost(
   // Never inline functions with conflicting attributes (unless callee has
   // always-inline attribute).
   Function *Caller = Call.getCaller();
-  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
+  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
     return llvm::InlineCost::getNever("conflicting attributes");
 
   // Don't inline this call if the caller has the optnone attribute.
index 91a78ed..8a63740 100644 (file)
@@ -215,8 +215,8 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
   };
 
   auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
-                             LocalParams, TTI, GetAssumptionCache, None, PSI,
-                             RemarksEnabled ? &ORE : nullptr);
+                                LocalParams, TTI, GetAssumptionCache, None,
+                                GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
 
   if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
     // Single BB does not increase total BB amount, thus subtract 1
index e818743..57af453 100644 (file)
@@ -71,7 +71,7 @@ public:
     };
     return llvm::getInlineCost(
         cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
-        /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr);
+        /*GetBFI=*/None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
index 4f9f4bd..d35d8f5 100644 (file)
@@ -528,7 +528,7 @@ static bool
 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
                 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
                 ProfileSummaryInfo *PSI,
-                std::function<TargetLibraryInfo &(Function &)> GetTLI,
+                std::function<const TargetLibraryInfo &(Function &)> GetTLI,
                 bool InsertLifetime,
                 function_ref<InlineCost(CallSite CS)> GetInlineCost,
                 function_ref<AAResults &(Function &)> AARGetter,
@@ -761,7 +761,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   ACT = &getAnalysis<AssumptionCacheTracker>();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-  auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
+  GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
     return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   };
   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
@@ -1008,6 +1008,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
       return FAM.getResult<BlockFrequencyAnalysis>(F);
     };
+    auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+      return FAM.getResult<TargetLibraryAnalysis>(F);
+    };
 
     auto GetInlineCost = [&](CallSite CS) {
       Function &Callee = *CS.getCalledFunction();
@@ -1016,7 +1019,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
           Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
               DEBUG_TYPE);
       return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
-                           CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
+                           CalleeTTI, GetAssumptionCache, {GetBFI}, GetTLI, PSI,
                            RemarksEnabled ? &ORE : nullptr);
     };
 
index a122e47..4f8c131 100644 (file)
@@ -203,9 +203,10 @@ struct PartialInlinerImpl {
       function_ref<AssumptionCache *(Function &)> LookupAC,
       std::function<TargetTransformInfo &(Function &)> *GTTI,
       Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
+      std::function<const TargetLibraryInfo &(Function &)> *GTLI,
       ProfileSummaryInfo *ProfSI)
       : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
-        GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+        GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
 
   bool run(Module &M);
   // Main part of the transformation that calls helper functions to find
@@ -274,6 +275,7 @@ private:
   function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
   std::function<TargetTransformInfo &(Function &)> *GetTTI;
   Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
+  std::function<const TargetLibraryInfo &(Function &)> *GetTLI;
   ProfileSummaryInfo *PSI;
 
   // Return the frequency of the OutlininingBB relative to F's entry point.
@@ -355,6 +357,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 
   bool runOnModule(Module &M) override {
@@ -381,8 +384,13 @@ struct PartialInlinerLegacyPass : public ModulePass {
       return TTIWP->getTTI(F);
     };
 
+    std::function<const TargetLibraryInfo &(Function &)> GetTLI =
+        [this](Function &F) -> TargetLibraryInfo & {
+      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    };
+
     return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
-                              &GetTTI, NoneType::None, PSI)
+                              &GetTTI, NoneType::None, &GetTLI, PSI)
         .run(M);
   }
 };
@@ -778,8 +786,8 @@ bool PartialInlinerImpl::shouldPartialInline(
           DEBUG_TYPE);
   assert(Call && "invalid callsite for partial inline");
   InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
-                                CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
-                                RemarksEnabled ? &ORE : nullptr);
+                                CalleeTTI, *GetAssumptionCache, GetBFI, *GetTLI,
+                                PSI, RemarksEnabled ? &ORE : nullptr);
 
   if (IC.isAlways()) {
     ORE.emit([&]() {
@@ -1493,6 +1501,7 @@ INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
                     "Partial Inliner", false, false)
 
@@ -1523,10 +1532,15 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
     return FAM.getResult<TargetIRAnalysis>(F);
   };
 
+  std::function<const TargetLibraryInfo &(Function &)> GetTLI =
+      [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
 
   if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
-                         {GetBFI}, PSI)
+                         {GetBFI}, &GetTLI, PSI)
           .run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
index 719b18c..b5b8340 100644 (file)
@@ -42,6 +42,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -307,10 +308,12 @@ public:
   SampleProfileLoader(
       StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
-      std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
+      std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
+      std::function<const TargetLibraryInfo &(Function &)> GetTLI)
       : GetAC(std::move(GetAssumptionCache)),
-        GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
-        Filename(std::string(Name)), RemappingFilename(std::string(RemapName)),
+        GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
+        CoverageTracker(*this), Filename(std::string(Name)),
+        RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
   bool doInitialization(Module &M);
@@ -397,6 +400,7 @@ protected:
 
   std::function<AssumptionCache &(Function &)> GetAC;
   std::function<TargetTransformInfo &(Function &)> GetTTI;
+  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
 
   /// Predecessors for each basic block in the CFG.
   BlockEdgeMap Predecessors;
@@ -474,14 +478,17 @@ public:
 
   SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
                                 bool IsThinLTOPreLink = false)
-      : ModulePass(ID),
-        SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
-                     [&](Function &F) -> AssumptionCache & {
-                       return ACT->getAssumptionCache(F);
-                     },
-                     [&](Function &F) -> TargetTransformInfo & {
-                       return TTIWP->getTTI(F);
-                     }) {
+      : ModulePass(ID), SampleLoader(
+                            Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+                            [&](Function &F) -> AssumptionCache & {
+                              return ACT->getAssumptionCache(F);
+                            },
+                            [&](Function &F) -> TargetTransformInfo & {
+                              return TTIWP->getTTI(F);
+                            },
+                            [&](Function &F) -> TargetLibraryInfo & {
+                              return TLIWP->getTLI(F);
+                            }) {
     initializeSampleProfileLoaderLegacyPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -498,6 +505,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
   }
 
@@ -505,6 +513,7 @@ private:
   SampleProfileLoader SampleLoader;
   AssumptionCacheTracker *ACT = nullptr;
   TargetTransformInfoWrapperPass *TTIWP = nullptr;
+  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
 };
 
 } // end anonymous namespace
@@ -902,7 +911,7 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
   // see if it is legal to inline the callsite.
   InlineCost Cost =
       getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
-                    None, nullptr, nullptr);
+                    None, GetTLI, nullptr, nullptr);
   if (Cost.isNever()) {
     ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
               << "incompatible inlining");
@@ -929,7 +938,7 @@ bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
 
   InlineCost Cost =
       getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
-                    GetTTI(*Callee), GetAC, None, nullptr, nullptr);
+                    GetTTI(*Callee), GetAC, None, GetTLI, nullptr, nullptr);
 
   return Cost.getCost() <= SampleColdCallSiteThreshold;
 }
@@ -1770,6 +1779,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
                       "Sample Profile loader", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
                     "Sample Profile loader", false, false)
@@ -1890,6 +1900,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
   ACT = &getAnalysis<AssumptionCacheTracker>();
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
   ProfileSummaryInfo *PSI =
       &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
@@ -1966,12 +1977,15 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
   auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
     return FAM.getResult<TargetIRAnalysis>(F);
   };
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
 
   SampleProfileLoader SampleLoader(
       ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
       ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
                                        : ProfileRemappingFileName,
-      IsThinLTOPreLink, GetAssumptionCache, GetTTI);
+      IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
   if (!SampleLoader.doInitialization(M))
     return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/Inline/inline-no-builtin-compatible.ll b/llvm/test/Transforms/Inline/inline-no-builtin-compatible.ll
new file mode 100644 (file)
index 0000000..2568cef
--- /dev/null
@@ -0,0 +1,94 @@
+; Test to ensure no inlining is allowed into a caller with fewer nobuiltin attributes.
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -inline | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
+
+; Make sure we don't inline callees into a caller with a superset of the
+; no builtin attributes when -inline-caller-superset-nobuiltin=false.
+; RUN: opt < %s -inline-caller-superset-nobuiltin=false -mtriple=x86_64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s --check-prefix=NOSUPERSET
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @allbuiltins() {
+entry:
+  %call = call i32 (...) @externalfunc()
+  ret i32 %call
+; CHECK-LABEL: allbuiltins
+; CHECK: call i32 (...) @externalfunc()
+}
+declare i32 @externalfunc(...)
+
+; We can inline a function that allows all builtins into one with a single
+; nobuiltin.
+define i32 @nobuiltinmemcpy() #0 {
+entry:
+  %call = call i32 @allbuiltins()
+  ret i32 %call
+; CHECK-LABEL: nobuiltinmemcpy
+; CHECK-NOT: call i32 @allbuiltins()
+; NOSUPERSET-LABEL: nobuiltinmemcpy
+; NOSUPERSET: call i32 @allbuiltins()
+}
+
+; We can inline a function that allows all builtins into one with all
+; nobuiltins.
+define i32 @nobuiltins() #1 {
+entry:
+  %call = call i32 @allbuiltins()
+  ret i32 %call
+; CHECK-LABEL: nobuiltins
+; CHECK-NOT: call i32 @allbuiltins()
+; NOSUPERSET-LABEL: nobuiltins
+; NOSUPERSET: call i32 @allbuiltins()
+}
+
+; We can inline a function with a single nobuiltin into one with all nobuiltins.
+define i32 @nobuiltins2() #1 {
+entry:
+  %call = call i32 @nobuiltinmemcpy()
+  ret i32 %call
+; CHECK-LABEL: nobuiltins2
+; CHECK-NOT: call i32 @nobuiltinmemcpy()
+; NOSUPERSET-LABEL: nobuiltins2
+; NOSUPERSET: call i32 @nobuiltinmemcpy()
+}
+
+; We can't inline a function with any given nobuiltin into one that allows all
+; builtins.
+define i32 @allbuiltins2() {
+entry:
+  %call = call i32 @nobuiltinmemcpy()
+  ret i32 %call
+; CHECK-LABEL: allbuiltins2
+; CHECK: call i32 @nobuiltinmemcpy()
+; NOSUPERSET-LABEL: allbuiltins2
+; NOSUPERSET: call i32 @nobuiltinmemcpy()
+}
+
+; We can't inline a function with all nobuiltins into one that allows all
+; builtins.
+define i32 @allbuiltins3() {
+entry:
+  %call = call i32 @nobuiltins()
+  ret i32 %call
+; CHECK-LABEL: allbuiltins3
+; CHECK: call i32 @nobuiltins()
+; NOSUPERSET-LABEL: allbuiltins3
+; NOSUPERSET: call i32 @nobuiltins()
+}
+
+; We can't inline a function with a specific nobuiltin into one with a
+; different specific nobuiltin.
+define i32 @nobuiltinmemset() #2 {
+entry:
+  %call = call i32 @nobuiltinmemcpy()
+  ret i32 %call
+; CHECK-LABEL: nobuiltinmemset
+; CHECK: call i32 @nobuiltinmemcpy()
+; NOSUPERSET-LABEL: nobuiltinmemset
+; NOSUPERSET: call i32 @nobuiltinmemcpy()
+}
+
+attributes #0 = { "no-builtin-memcpy" }
+attributes #1 = { "no-builtins" }
+attributes #2 = { "no-builtin-memset" }