From 883dbb9c86be87593a58ef10b070b3a0564c7fee Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 15:43:46 +0100 Subject: [PATCH] Revert "[MemProf] Context disambiguation cloning pass [patch 1a/3]" MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This reverts commit d6ad4f01c3dafcab335bca66dac6e36d9eac8421. Fails to build on at least gcc 12.2: /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:482:1: error: no declaration matches ‘ContextNode* CallsiteContextGraph::getNodeForInst(const CallInfo&)’ 482 | CallsiteContextGraph::getNodeForInst( | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:393:16: note: candidate is: ‘CallsiteContextGraph::ContextNode* CallsiteContextGraph::getNodeForInst(const CallInfo&)’ 393 | ContextNode *getNodeForInst(const CallInfo &C); | ^~~~~~~~~~~~~~ /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:99:7: note: ‘class CallsiteContextGraph’ defined here 99 | class CallsiteContextGraph { | ^~~~~~~~~~~~~~~~~~~~ --- .../Transforms/IPO/MemProfContextDisambiguation.h | 38 - llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassBuilderPipelines.cpp | 11 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/IPO/CMakeLists.txt | 1 - .../IPO/MemProfContextDisambiguation.cpp | 1583 -------------------- llvm/test/ThinLTO/X86/memprof-summary.ll | 184 +++ .../MemProfContextDisambiguation/basic.ll | 158 -- .../duplicate-context-ids.ll | 232 --- .../duplicate-context-ids2.ll | 386 ----- .../MemProfContextDisambiguation/indirectcall.ll | 261 ---- .../MemProfContextDisambiguation/inlined.ll | 189 --- .../MemProfContextDisambiguation/inlined2.ll | 135 -- .../MemProfContextDisambiguation/pass-pipeline.ll | 41 - 14 files changed, 184 insertions(+), 3037 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h delete mode 100644 llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp create mode 100644 llvm/test/ThinLTO/X86/memprof-summary.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/basic.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h deleted file mode 100644 index 56e56ed..0000000 --- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h +++ /dev/null @@ -1,38 +0,0 @@ -//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Implements support for context disambiguation of allocation calls for profile -// guided heap optimization using memprof metadata. See implementation file for -// details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H -#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/PassManager.h" - -namespace llvm { -class Module; - -class MemProfContextDisambiguation - : public PassInfoMixin { - /// Run the context disambiguator on \p M, returns true if any changes made. - bool processModule(Module &M); - -public: - MemProfContextDisambiguation() {} - - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a04f8bb..89d2e6a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -117,7 +117,6 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index aaabe23..1d38613 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -57,7 +57,6 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" @@ -272,10 +271,6 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); -cl::opt EnableMemProfContextDisambiguation( - "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); - PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -1714,12 +1709,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, InlinePass::CGSCCInliner})); - // Perform context disambiguation after inlining, since that would reduce the - // amount of additional cloning required to distinguish the allocation - // contexts. - if (EnableMemProfContextDisambiguation) - MPM.addPass(MemProfContextDisambiguation()); - // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 82592a1..04d6485 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -87,7 +87,6 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) -MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index e03aff0..063a9a6 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_component_library(LLVMipo Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp - MemProfContextDisambiguation.cpp MergeFunctions.cpp ModuleInliner.cpp OpenMPOpt.cpp diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp deleted file mode 100644 index fc8b12d..0000000 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ /dev/null @@ -1,1583 +0,0 @@ -//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements support for context disambiguation of allocation -// calls for profile guided heap optimization. Specifically, it uses Memprof -// profiles which indicate context specific allocation behavior (currently -// distinguishing cold vs hot memory allocations). Cloning is performed to -// expose the cold allocation call contexts, and the allocation calls are -// subsequently annotated with an attribute for later transformation. -// -// The transformations can be performed either directly on IR (regular LTO), or -// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO -// backend). Both types of LTO operate on a the same base graph representation, -// which uses CRTP to support either IR or Index formats. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/MemoryProfileInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" -#include -#include -using namespace llvm; -using namespace llvm::memprof; - -#define DEBUG_TYPE "memprof-context-disambiguation" - -static cl::opt DotFilePathPrefix( - "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, - cl::value_desc("filename"), - cl::desc("Specify the path prefix of the MemProf dot files.")); - -static cl::opt ExportToDot("memprof-export-to-dot", cl::init(false), - cl::Hidden, - cl::desc("Export graph to dot files.")); - -static cl::opt - DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, - cl::desc("Dump CallingContextGraph to stdout after each stage.")); - -static cl::opt - VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, - cl::desc("Perform verification checks on CallingContextGraph.")); - -static cl::opt - VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, - cl::desc("Perform frequent verification checks on nodes.")); - -inline bool hasSingleAllocType(uint8_t AllocTypes) { - switch (AllocTypes) { - case (uint8_t)AllocationType::Cold: - case (uint8_t)AllocationType::NotCold: - return true; - break; - case (uint8_t)AllocationType::None: - assert(false); - break; - default: - return false; - break; - } - llvm_unreachable("invalid alloc type"); -} - -/// CRTP base for graphs built from either IR or ThinLTO summary index. -/// -/// The graph represents the call contexts in all memprof metadata on allocation -/// calls, with nodes for the allocations themselves, as well as for the calls -/// in each context. The graph is initially built from the allocation memprof -/// metadata (or summary) MIBs. It is then updated to match calls with callsite -/// metadata onto the nodes, updating it to reflect any inlining performed on -/// those calls. -/// -/// Each MIB (representing an allocation's call context with allocation -/// behavior) is assigned a unique context id during the graph build. The edges -/// and nodes in the graph are decorated with the context ids they carry. This -/// is used to correctly update the graph when cloning is performed so that we -/// can uniquify the context for a single (possibly cloned) allocation. -template -class CallsiteContextGraph { -public: - CallsiteContextGraph() = default; - CallsiteContextGraph(const CallsiteContextGraph &) = default; - CallsiteContextGraph(CallsiteContextGraph &&) = default; - - /// Main entry point to perform analysis and transformations on graph. - bool process(); - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, - const CallsiteContextGraph &CCG) { - CCG.print(OS); - return OS; - } - - friend struct GraphTraits< - const CallsiteContextGraph *>; - friend struct DOTGraphTraits< - const CallsiteContextGraph *>; - - void exportToDot(std::string Label) const; - - /// Represents a function clone via FuncTy pointer and clone number pair. - struct FuncInfo final - : public std::pair { - using Base = std::pair; - FuncInfo(const Base &B) : Base(B) {} - FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} - explicit operator bool() const { return this->first != nullptr; } - FuncTy *func() const { return this->first; } - unsigned cloneNo() const { return this->second; } - }; - - /// Represents a callsite clone via CallTy and clone number pair. - struct CallInfo final : public std::pair { - using Base = std::pair; - CallInfo(const Base &B) : Base(B) {} - CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) - : Base(Call, CloneNo) {} - explicit operator bool() const { return (bool)this->first; } - CallTy call() const { return this->first; } - unsigned cloneNo() const { return this->second; } - void setCloneNo(unsigned N) { this->second = N; } - void print(raw_ostream &OS) const { - if (!operator bool()) { - assert(!cloneNo()); - OS << "null Call"; - return; - } - call()->print(OS); - OS << "\t(clone " << cloneNo() << ")"; - } - void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { - Call.print(OS); - return OS; - } - }; - - struct ContextEdge; - - /// Node in the Callsite Context Graph - struct ContextNode { - // Keep this for now since in the IR case where we have an Instruction* it - // is not as immediately discoverable. Used for printing richer information - // when dumping graph. - bool IsAllocation; - - // Keeps track of when the Call was reset to null because there was - // recursion. - bool Recursive = false; - - // The corresponding allocation or interior call. - CallInfo Call; - - // For alloc nodes this is a unique id assigned when constructed, and for - // callsite stack nodes it is the original stack id when the node is - // constructed from the memprof MIB metadata on the alloc nodes. Note that - // this is only used when matching callsite metadata onto the stack nodes - // created when processing the allocation memprof MIBs, and for labeling - // nodes in the dot graph. Therefore we don't bother to assign a value for - // clones. - uint64_t OrigStackOrAllocId = 0; - - // This will be formed by ORing together the AllocationType enum values - // for contexts including this node. - uint8_t AllocTypes = 0; - - // Edges to all callees in the profiled call stacks. - // TODO: Should this be a map (from Callee node) for more efficient lookup? - std::vector> CalleeEdges; - - // Edges to all callers in the profiled call stacks. - // TODO: Should this be a map (from Caller node) for more efficient lookup? - std::vector> CallerEdges; - - // The set of IDs for contexts including this node. - DenseSet ContextIds; - - // List of clones of this ContextNode, initially empty. - std::vector Clones; - - // If a clone, points to the original uncloned node. - ContextNode *CloneOf = nullptr; - - ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} - - ContextNode(bool IsAllocation, CallInfo C) - : IsAllocation(IsAllocation), Call(C) {} - - std::unique_ptr clone() { - auto Clone = std::make_unique(IsAllocation, Call); - if (CloneOf) { - CloneOf->Clones.push_back(Clone.get()); - Clone->CloneOf = CloneOf; - } else { - Clones.push_back(Clone.get()); - Clone->CloneOf = this; - } - return Clone; - } - - ContextNode *getOrigNode() { - if (!CloneOf) - return this; - return CloneOf; - } - - void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, - unsigned int ContextId); - - ContextEdge *findEdgeFromCallee(const ContextNode *Callee); - ContextEdge *findEdgeFromCaller(const ContextNode *Caller); - void eraseCalleeEdge(const ContextEdge *Edge); - void eraseCallerEdge(const ContextEdge *Edge); - - void setCall(CallInfo C) { Call = C; } - - bool hasCall() const { return (bool)Call.call(); } - - void printCall(raw_ostream &OS) const { Call.print(OS); } - - // True if this node was effectively removed from the graph, in which case - // its context id set, caller edges, and callee edges should all be empty. - bool isRemoved() const { - assert(ContextIds.empty() == - (CalleeEdges.empty() && CallerEdges.empty())); - return ContextIds.empty(); - } - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { - Node.print(OS); - return OS; - } - }; - - /// Edge in the Callsite Context Graph from a ContextNode N to a caller or - /// callee. - struct ContextEdge { - ContextNode *Callee; - ContextNode *Caller; - - // This will be formed by ORing together the AllocationType enum values - // for contexts including this edge. - uint8_t AllocTypes = 0; - - // The set of IDs for contexts including this edge. - DenseSet ContextIds; - - ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, - DenseSet ContextIds) - : Callee(Callee), Caller(Caller), AllocTypes(AllocType), - ContextIds(ContextIds) {} - - DenseSet &getContextIds() { return ContextIds; } - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { - Edge.print(OS); - return OS; - } - }; - -protected: - /// Get a list of nodes corresponding to the stack ids in the given callsite - /// context. - template - std::vector - getStackIdsWithContextNodes(CallStack &CallsiteContext); - - /// Adds nodes for the given allocation and any stack ids on its memprof MIB - /// metadata (or summary). - ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); - - /// Adds nodes for the given MIB stack ids. - template - void addStackNodesForMIB(ContextNode *AllocNode, - CallStack &StackContext, - CallStack &CallsiteContext, - AllocationType AllocType); - - /// Matches all callsite metadata (or summary) to the nodes created for - /// allocation memprof MIB metadata, synthesizing new nodes to reflect any - /// inlining performed on those callsite instructions. - void updateStackNodes(); - - /// Update graph to conservatively handle any callsite stack nodes that target - /// multiple different callee target functions. - void handleCallsitesWithMultipleTargets(); - - /// Save lists of calls with MemProf metadata in each function, for faster - /// iteration. - std::vector>> - FuncToCallsWithMetadata; - - /// Map from callsite node to the enclosing caller function. - std::map NodeToCallingFunc; - -private: - using EdgeIter = typename std::vector>::iterator; - - using CallContextInfo = std::tuple, - const FuncTy *, DenseSet>; - - /// Assigns the given Node to calls at or inlined into the location with - /// the Node's stack id, after post order traversing and processing its - /// caller nodes. Uses the call information recorded in the given - /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences - /// as needed. Called by updateStackNodes which sets up the given - /// StackIdToMatchingCalls map. - void assignStackNodesPostOrder( - ContextNode *Node, DenseSet &Visited, - DenseMap> &StackIdToMatchingCalls); - - /// Duplicates the given set of context ids, updating the provided - /// map from each original id with the newly generated context ids, - /// and returning the new duplicated id set. - DenseSet duplicateContextIds( - const DenseSet &StackSequenceContextIds, - DenseMap> &OldToNewContextIds); - - /// Propagates all duplicated context ids across the graph. - void propagateDuplicateContextIds( - const DenseMap> &OldToNewContextIds); - - /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, - /// else to its callers. Also updates OrigNode's edges to remove any context - /// ids moved to the newly created edge. - void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, - bool TowardsCallee); - - /// Get the stack id corresponding to the given Id or Index (for IR this will - /// return itself, for a summary index this will return the id recorded in the - /// index for that stack id index value). - uint64_t getStackId(uint64_t IdOrIndex) const { - return static_cast(this)->getStackId(IdOrIndex); - } - - /// Returns true if the given call targets the given function. - bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { - return static_cast(this)->calleeMatchesFunc(Call, Func); - } - - /// Get a list of nodes corresponding to the stack ids in the given - /// callsite's context. - std::vector getStackIdsWithContextNodesForCall(CallTy Call) { - return static_cast(this)->getStackIdsWithContextNodesForCall( - Call); - } - - /// Get the last stack id in the context for callsite. - uint64_t getLastStackId(CallTy Call) { - return static_cast(this)->getLastStackId(Call); - } - - /// Gets a label to use in the dot graph for the given call clone in the given - /// function. - std::string getLabel(const FuncTy *Func, const CallTy Call, - unsigned CloneNo) const { - return static_cast(this)->getLabel(Func, Call, CloneNo); - } - - /// Helpers to find the node corresponding to the given call or stackid. - ContextNode *getNodeForInst(const CallInfo &C); - ContextNode *getNodeForAlloc(const CallInfo &C); - ContextNode *getNodeForStackId(uint64_t StackId); - - /// Removes the node information recorded for the given call. - void unsetNodeForInst(const CallInfo &C); - - /// Computes the alloc type corresponding to the given context ids, by - /// unioning their recorded alloc types. - uint8_t computeAllocType(DenseSet &ContextIds); - - /// Map from each context ID to the AllocationType assigned to that context. - std::map ContextIdToAllocationType; - - /// Identifies the context node created for a stack id when adding the MIB - /// contexts to the graph. This is used to locate the context nodes when - /// trying to assign the corresponding callsites with those stack ids to these - /// nodes. - std::map StackEntryIdToContextNodeMap; - - /// Maps to track the calls to their corresponding nodes in the graph. - std::map AllocationCallToContextNodeMap; - std::map NonAllocationCallToContextNodeMap; - - /// Owner of all ContextNode unique_ptrs. - std::vector> NodeOwner; - - /// Perform sanity checks on graph when requested. - void check() const; - - /// Keeps track of the last unique context id assigned. - unsigned int LastContextId = 0; -}; - -template -using ContextNode = - typename CallsiteContextGraph::ContextNode; -template -using ContextEdge = - typename CallsiteContextGraph::ContextEdge; -template -using FuncInfo = - typename CallsiteContextGraph::FuncInfo; -template -using CallInfo = - typename CallsiteContextGraph::CallInfo; - -/// CRTP derived class for graphs built from IR (regular LTO). -class ModuleCallsiteContextGraph - : public CallsiteContextGraph { -public: - ModuleCallsiteContextGraph(Module &M); - -private: - friend CallsiteContextGraph; - - uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(Instruction *Call, const Function *Func); - uint64_t getLastStackId(Instruction *Call); - std::vector getStackIdsWithContextNodesForCall(Instruction *Call); - std::string getLabel(const Function *Func, const Instruction *Call, - unsigned CloneNo) const; - - const Module &Mod; -}; - -namespace { - -struct FieldSeparator { - bool Skip = true; - const char *Sep; - - FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} -}; - -raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { - if (FS.Skip) { - FS.Skip = false; - return OS; - } - return OS << FS.Sep; -} - -} // end anonymous namespace - -template -ContextNode * -CallsiteContextGraph::getNodeForInst( - const CallInfo &C) { - ContextNode *Node = getNodeForAlloc(C); - if (Node) - return Node; - - auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C); - if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) { - return NonAllocCallNode->second; - } - return nullptr; -} - -template -ContextNode * -CallsiteContextGraph::getNodeForAlloc( - const CallInfo &C) { - auto AllocCallNode = AllocationCallToContextNodeMap.find(C); - if (AllocCallNode != AllocationCallToContextNodeMap.end()) { - return AllocCallNode->second; - } - return nullptr; -} - -template -ContextNode * -CallsiteContextGraph::getNodeForStackId( - uint64_t StackId) { - auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); - if (StackEntryNode != StackEntryIdToContextNodeMap.end()) - return StackEntryNode->second; - return nullptr; -} - -template -void CallsiteContextGraph::unsetNodeForInst( - const CallInfo &C) { - AllocationCallToContextNodeMap.erase(C) || - NonAllocationCallToContextNodeMap.erase(C); - assert(!AllocationCallToContextNodeMap.count(C) && - !NonAllocationCallToContextNodeMap.count(C)); -} - -template -void CallsiteContextGraph::ContextNode:: - addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, - unsigned int ContextId) { - for (auto &Edge : CallerEdges) { - if (Edge->Caller == Caller) { - Edge->AllocTypes |= (uint8_t)AllocType; - Edge->getContextIds().insert(ContextId); - return; - } - } - std::shared_ptr Edge = std::make_shared( - this, Caller, (uint8_t)AllocType, DenseSet({ContextId})); - CallerEdges.push_back(Edge); - Caller->CalleeEdges.push_back(Edge); -} - -template -ContextEdge * -CallsiteContextGraph::ContextNode:: - findEdgeFromCallee(const ContextNode *Callee) { - for (const auto &Edge : CalleeEdges) - if (Edge->Callee == Callee) - return Edge.get(); - return nullptr; -} - -template -ContextEdge * -CallsiteContextGraph::ContextNode:: - findEdgeFromCaller(const ContextNode *Caller) { - for (const auto &Edge : CallerEdges) - if (Edge->Caller == Caller) - return Edge.get(); - return nullptr; -} - -template -void CallsiteContextGraph::ContextNode:: - eraseCalleeEdge(const ContextEdge *Edge) { - auto EI = - std::find_if(CalleeEdges.begin(), CalleeEdges.end(), - [Edge](const std::shared_ptr &CalleeEdge) { - return CalleeEdge.get() == Edge; - }); - assert(EI != CalleeEdges.end()); - CalleeEdges.erase(EI); -} - -template -void CallsiteContextGraph::ContextNode:: - eraseCallerEdge(const ContextEdge *Edge) { - auto EI = - std::find_if(CallerEdges.begin(), CallerEdges.end(), - [Edge](const std::shared_ptr &CallerEdge) { - return CallerEdge.get() == Edge; - }); - assert(EI != CallerEdges.end()); - CallerEdges.erase(EI); -} - -template -uint8_t CallsiteContextGraph::computeAllocType( - DenseSet &ContextIds) { - uint8_t BothTypes = - (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; - uint8_t AllocType = (uint8_t)AllocationType::None; - for (auto Id : ContextIds) { - AllocType |= (uint8_t)ContextIdToAllocationType[Id]; - // Bail early if alloc type reached both, no further refinement. - if (AllocType == BothTypes) - return AllocType; - } - return AllocType; -} - -template -ContextNode * -CallsiteContextGraph::addAllocNode( - CallInfo Call, const FuncTy *F) { - assert(!getNodeForAlloc(Call)); - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/true, Call)); - ContextNode *AllocNode = NodeOwner.back().get(); - AllocationCallToContextNodeMap[Call] = AllocNode; - NodeToCallingFunc[AllocNode] = F; - // Use LastContextId as a uniq id for MIB allocation nodes. - AllocNode->OrigStackOrAllocId = LastContextId; - // Alloc type should be updated as we add in the MIBs. We should assert - // afterwards that it is not still None. - AllocNode->AllocTypes = (uint8_t)AllocationType::None; - - return AllocNode; -} - -template -template -void CallsiteContextGraph::addStackNodesForMIB( - ContextNode *AllocNode, CallStack &StackContext, - CallStack &CallsiteContext, AllocationType AllocType) { - ContextIdToAllocationType[++LastContextId] = AllocType; - - // Update alloc type and context ids for this MIB. - AllocNode->AllocTypes |= (uint8_t)AllocType; - AllocNode->ContextIds.insert(LastContextId); - - // Now add or update nodes for each stack id in alloc's context. - // Later when processing the stack ids on non-alloc callsites we will adjust - // for any inlining in the context. - ContextNode *PrevNode = AllocNode; - // Look for recursion (direct recursion should have been collapsed by - // module summary analysis, here we should just be detecting mutual - // recursion). Mark these nodes so we don't try to clone. - SmallSet StackIdSet; - // Skip any on the allocation call (inlining). - for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); - ContextIter != StackContext.end(); ++ContextIter) { - auto StackId = getStackId(*ContextIter); - ContextNode *StackNode = getNodeForStackId(StackId); - if (!StackNode) { - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/false)); - StackNode = NodeOwner.back().get(); - StackEntryIdToContextNodeMap[StackId] = StackNode; - StackNode->OrigStackOrAllocId = StackId; - } - auto Ins = StackIdSet.insert(StackId); - if (!Ins.second) - StackNode->Recursive = true; - StackNode->ContextIds.insert(LastContextId); - StackNode->AllocTypes |= (uint8_t)AllocType; - PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); - PrevNode = StackNode; - } -} - -template -DenseSet -CallsiteContextGraph::duplicateContextIds( - const DenseSet &StackSequenceContextIds, - DenseMap> &OldToNewContextIds) { - DenseSet NewContextIds; - for (auto OldId : StackSequenceContextIds) { - NewContextIds.insert(++LastContextId); - OldToNewContextIds[OldId].insert(LastContextId); - assert(ContextIdToAllocationType.count(OldId)); - // The new context has the same allocation type as original. - ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; - } - return NewContextIds; -} - -template -void CallsiteContextGraph:: - propagateDuplicateContextIds( - const DenseMap> &OldToNewContextIds) { - // Build a set of duplicated context ids corresponding to the input id set. - auto GetNewIds = [&OldToNewContextIds](const DenseSet &ContextIds) { - DenseSet NewIds; - for (auto Id : ContextIds) - if (auto NewId = OldToNewContextIds.find(Id); - NewId != OldToNewContextIds.end()) - NewIds.insert(NewId->second.begin(), NewId->second.end()); - return NewIds; - }; - - // Recursively update context ids sets along caller edges. - auto UpdateCallers = [&](ContextNode *Node, - DenseSet &Visited, - auto &&UpdateCallers) -> void { - for (auto Edge : Node->CallerEdges) { - auto Inserted = Visited.insert(Edge.get()); - if (!Inserted.second) - continue; - ContextNode *NextNode = Edge->Caller; - DenseSet NewIdsToAdd = GetNewIds(Edge->getContextIds()); - // Only need to recursively iterate to NextNode via this caller edge if - // it resulted in any added ids to NextNode. - if (!NewIdsToAdd.empty()) { - Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - UpdateCallers(NextNode, Visited, UpdateCallers); - } - } - }; - - DenseSet Visited; - for (auto &Entry : AllocationCallToContextNodeMap) { - auto *Node = Entry.second; - // Update ids on the allocation nodes before calling the recursive - // update along caller edges, since this simplifies the logic during - // that traversal. - DenseSet NewIdsToAdd = GetNewIds(Node->ContextIds); - Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - UpdateCallers(Node, Visited, UpdateCallers); - } -} - -template -void CallsiteContextGraph::connectNewNode( - ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { - // Make a copy of the context ids, since this will be adjusted below as they - // are moved. - DenseSet RemainingContextIds = NewNode->ContextIds; - auto &OrigEdges = - TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; - // Increment iterator in loop so that we can remove edges as needed. - for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { - auto Edge = *EI; - // Remove any matching context ids from Edge, return set that were found and - // removed, these are the new edge's context ids. Also update the remaining - // (not found ids). - DenseSet NewEdgeContextIds, NotFoundContextIds; - set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, - NotFoundContextIds); - RemainingContextIds.swap(NotFoundContextIds); - // If no matching context ids for this edge, skip it. - if (NewEdgeContextIds.empty()) { - ++EI; - continue; - } - if (TowardsCallee) { - auto NewEdge = std::make_shared( - Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), - NewEdgeContextIds); - NewNode->CalleeEdges.push_back(NewEdge); - NewEdge->Callee->CallerEdges.push_back(NewEdge); - } else { - auto NewEdge = std::make_shared( - NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), - NewEdgeContextIds); - NewNode->CallerEdges.push_back(NewEdge); - NewEdge->Caller->CalleeEdges.push_back(NewEdge); - } - // Remove old edge if context ids empty. - if (Edge->getContextIds().empty()) { - if (TowardsCallee) { - Edge->Callee->eraseCallerEdge(Edge.get()); - EI = OrigNode->CalleeEdges.erase(EI); - } else { - Edge->Caller->eraseCalleeEdge(Edge.get()); - EI = OrigNode->CallerEdges.erase(EI); - } - continue; - } - ++EI; - } -} - -template -void CallsiteContextGraph:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls) { - auto Inserted = Visited.insert(Node); - if (!Inserted.second) - return; - // Post order traversal. Iterate over a copy since we may add nodes and - // therefore new callers during the recursive call, invalidating any - // iterator over the original edge vector. We don't need to process these - // new nodes as they were already processed on creation. - auto CallerEdges = Node->CallerEdges; - for (auto &Edge : CallerEdges) { - // Skip any that have been removed during the recursion. - if (!Edge) - continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); - } - - // If this node's stack id is in the map, update the graph to contain new - // nodes representing any inlining at interior callsites. Note we move the - // associated context ids over to the new nodes. - - // Ignore this node if it is for an allocation or we didn't record any - // stack id lists ending at it. - if (Node->IsAllocation || - !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) - return; - - auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; - // Handle the simple case first. A single call with a single stack id. - // In this case there is no need to create any new context nodes, simply - // assign the context node for stack id to this Call. - if (Calls.size() == 1) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; - if (Ids.size() == 1) { - assert(SavedContextIds.empty()); - // It should be this Node - assert(Node == getNodeForStackId(Ids[0])); - if (Node->Recursive) - return; - Node->setCall(Call); - NonAllocationCallToContextNodeMap[Call] = Node; - NodeToCallingFunc[Node] = Func; - return; - } - } - - // Find the node for the last stack id, which should be the same - // across all calls recorded for this id, and is this node's id. - uint64_t LastId = Node->OrigStackOrAllocId; - ContextNode *LastNode = getNodeForStackId(LastId); - // We should only have kept stack ids that had nodes. - assert(LastNode); - - for (unsigned I = 0; I < Calls.size(); I++) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; - // Skip any for which we didn't assign any ids, these don't get a node in - // the graph. - if (SavedContextIds.empty()) - continue; - - assert(LastId == Ids.back()); - - ContextNode *FirstNode = getNodeForStackId(Ids[0]); - assert(FirstNode); - - // Recompute the context ids for this stack id sequence (the - // intersection of the context ids of the corresponding nodes). - // Start with the ids we saved in the map for this call, which could be - // duplicated context ids. We have to recompute as we might have overlap - // overlap between the saved context ids for different last nodes, and - // removed them already during the post order traversal. - set_intersect(SavedContextIds, FirstNode->ContextIds); - ContextNode *PrevNode = nullptr; - for (auto Id : Ids) { - ContextNode *CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes and weren't - // recursive. - assert(CurNode); - assert(!CurNode->Recursive); - if (!PrevNode) { - PrevNode = CurNode; - continue; - } - auto *Edge = CurNode->findEdgeFromCallee(PrevNode); - if (!Edge) { - SavedContextIds.clear(); - break; - } - PrevNode = CurNode; - set_intersect(SavedContextIds, Edge->getContextIds()); - - // If we now have no context ids for clone, skip this call. - if (SavedContextIds.empty()) - break; - } - if (SavedContextIds.empty()) - continue; - - // Create new context node. - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/false, Call)); - ContextNode *NewNode = NodeOwner.back().get(); - NodeToCallingFunc[NewNode] = Func; - NonAllocationCallToContextNodeMap[Call] = NewNode; - NewNode->ContextIds = SavedContextIds; - NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); - - // Connect to callees of innermost stack frame in inlined call chain. - // This updates context ids for FirstNode's callee's to reflect those - // moved to NewNode. - connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); - - // Connect to callers of outermost stack frame in inlined call chain. - // This updates context ids for FirstNode's caller's to reflect those - // moved to NewNode. - connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); - - // Now we need to remove context ids from edges/nodes between First and - // Last Node. - PrevNode = nullptr; - for (auto Id : Ids) { - ContextNode *CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes. - assert(CurNode); - - // Remove the context ids moved to NewNode from CurNode, and the - // edge from the prior node. - set_subtract(CurNode->ContextIds, NewNode->ContextIds); - if (PrevNode) { - auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); - assert(PrevEdge); - set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); - if (PrevEdge->getContextIds().empty()) { - PrevNode->eraseCallerEdge(PrevEdge); - CurNode->eraseCalleeEdge(PrevEdge); - } - } - PrevNode = CurNode; - } - } -} - -template -void CallsiteContextGraph::updateStackNodes() { - // Map of stack id to all calls with that as the last (outermost caller) - // callsite id that has a context node (some might not due to pruning - // performed during matching of the allocation profile contexts). - // The CallContextInfo contains the Call and a list of its stack ids with - // ContextNodes, the function containing Call, and the set of context ids - // the analysis will eventually identify for use in any new node created - // for that callsite. - DenseMap> StackIdToMatchingCalls; - for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { - for (auto &Call : CallsWithMetadata) { - // Ignore allocations, already handled. - if (AllocationCallToContextNodeMap.count(Call)) - continue; - auto StackIdsWithContextNodes = - getStackIdsWithContextNodesForCall(Call.call()); - // If there were no nodes created for MIBs on allocs (maybe this was in - // the unambiguous part of the MIB stack that was pruned), ignore. - if (StackIdsWithContextNodes.empty()) - continue; - // Otherwise, record this Call along with the list of ids for the last - // (outermost caller) stack id with a node. - StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( - {Call.call(), StackIdsWithContextNodes, Func, {}}); - } - } - - // First make a pass through all stack ids that correspond to a call, - // as identified in the above loop. Compute the context ids corresponding to - // each of these calls when they correspond to multiple stack ids due to - // due to inlining. Perform any duplication of context ids required when - // there is more than one call with the same stack ids. Their (possibly newly - // duplicated) context ids are saved in the StackIdToMatchingCalls map. - DenseMap> OldToNewContextIds; - for (auto &It : StackIdToMatchingCalls) { - auto &Calls = It.getSecond(); - // Skip single calls with a single stack id. These don't need a new node. - if (Calls.size() == 1) { - auto &Ids = std::get<1>(Calls[0]); - if (Ids.size() == 1) - continue; - } - // In order to do the best and maximal matching of inlined calls to context - // node sequences we will sort the vectors of stack ids in descending order - // of length, and within each length, lexicographically by stack id. The - // latter is so that we can specially handle calls that have identical stack - // id sequences (either due to cloning or artificially because of the MIB - // context pruning). - std::sort(Calls.begin(), Calls.end(), - [](const CallContextInfo &A, const CallContextInfo &B) { - auto &IdsA = std::get<1>(A); - auto &IdsB = std::get<1>(B); - return IdsA.size() > IdsB.size() || - (IdsA.size() == IdsB.size() && IdsA < IdsB); - }); - - // Find the node for the last stack id, which should be the same - // across all calls recorded for this id, and is the id for this - // entry in the StackIdToMatchingCalls map. - uint64_t LastId = It.getFirst(); - ContextNode *LastNode = getNodeForStackId(LastId); - // We should only have kept stack ids that had nodes. - assert(LastNode); - - if (LastNode->Recursive) - continue; - - // Initialize the context ids with the last node's. We will subsequently - // refine the context ids by computing the intersection along all edges. - DenseSet LastNodeContextIds = LastNode->ContextIds; - assert(!LastNodeContextIds.empty()); - - for (unsigned I = 0; I < Calls.size(); I++) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; - assert(SavedContextIds.empty()); - assert(LastId == Ids.back()); - - // First compute the context ids for this stack id sequence (the - // intersection of the context ids of the corresponding nodes). - // Start with the remaining saved ids for the last node. - assert(!LastNodeContextIds.empty()); - DenseSet StackSequenceContextIds = LastNodeContextIds; - - ContextNode *PrevNode = LastNode; - ContextNode *CurNode = LastNode; - bool Skip = false; - - // Iterate backwards through the stack Ids, starting after the last Id - // in the list, which was handled once outside for all Calls. - for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { - auto Id = *IdIter; - CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes. - assert(CurNode); - - if (CurNode->Recursive) { - Skip = true; - break; - } - - auto *Edge = CurNode->findEdgeFromCaller(PrevNode); - // If there is no edge then the nodes belong to different MIB contexts, - // and we should skip this inlined context sequence. For example, this - // particular inlined context may include stack ids A->B, and we may - // indeed have nodes for both A and B, but it is possible that they were - // never profiled in sequence in a single MIB for any allocation (i.e. - // we might have profiled an allocation that involves the callsite A, - // but through a different one of its callee callsites, and we might - // have profiled an allocation that involves callsite B, but reached - // from a different caller callsite). - if (!Edge) { - Skip = true; - break; - } - PrevNode = CurNode; - - // Update the context ids, which is the intersection of the ids along - // all edges in the sequence. - set_intersect(StackSequenceContextIds, Edge->getContextIds()); - - // If we now have no context ids for clone, skip this call. - if (StackSequenceContextIds.empty()) { - Skip = true; - break; - } - } - if (Skip) - continue; - - // If some of this call's stack ids did not have corresponding nodes (due - // to pruning), don't include any context ids for contexts that extend - // beyond these nodes. Otherwise we would be matching part of unrelated / - // not fully matching stack contexts. To do this, subtract any context ids - // found in caller nodes of the last node found above. - if (Ids.back() != getLastStackId(Call)) { - for (auto PE : LastNode->CallerEdges) { - set_subtract(StackSequenceContextIds, PE->getContextIds()); - if (StackSequenceContextIds.empty()) - break; - } - // If we now have no context ids for clone, skip this call. - if (StackSequenceContextIds.empty()) - continue; - } - - // Check if the next set of stack ids is the same (since the Calls vector - // of tuples is sorted by the stack ids we can just look at the next one). - bool DuplicateContextIds = false; - if (I + 1 < Calls.size()) { - auto NextIds = std::get<1>(Calls[I + 1]); - DuplicateContextIds = Ids == NextIds; - } - - // If we don't have duplicate context ids, then we can assign all the - // context ids computed for the original node sequence to this call. - // If there are duplicate calls with the same stack ids then we synthesize - // new context ids that are duplicates of the originals. These are - // assigned to SavedContextIds, which is a reference into the map entry - // for this call, allowing us to access these ids later on. - OldToNewContextIds.reserve(OldToNewContextIds.size() + - StackSequenceContextIds.size()); - SavedContextIds = - DuplicateContextIds - ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) - : StackSequenceContextIds; - assert(!SavedContextIds.empty()); - - if (!DuplicateContextIds) { - // Update saved last node's context ids to remove those that are - // assigned to other calls, so that it is ready for the next call at - // this stack id. - set_subtract(LastNodeContextIds, StackSequenceContextIds); - if (LastNodeContextIds.empty()) - break; - } - } - } - - // Propagate the duplicate context ids over the graph. - propagateDuplicateContextIds(OldToNewContextIds); - - if (VerifyCCG) - check(); - - // Now perform a post-order traversal over the graph, starting with the - // allocation nodes, essentially processing nodes from callers to callees. - // For any that contains an id in the map, update the graph to contain new - // nodes representing any inlining at interior callsites. Note we move the - // associated context ids over to the new nodes. - DenseSet Visited; - for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); -} - -uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { - CallStack CallsiteContext( - Call->getMetadata(LLVMContext::MD_callsite)); - return CallsiteContext.back(); -} - -std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, - const Instruction *Call, - unsigned CloneNo) const { - return (Twine(Call->getFunction()->getName()) + " -> " + - cast(Call)->getCalledFunction()->getName()) - .str(); -} - -std::vector -ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( - Instruction *Call) { - CallStack CallsiteContext( - Call->getMetadata(LLVMContext::MD_callsite)); - return getStackIdsWithContextNodes( - CallsiteContext); -} - -template -template -std::vector -CallsiteContextGraph::getStackIdsWithContextNodes( - CallStack &CallsiteContext) { - std::vector StackIds; - for (auto IdOrIndex : CallsiteContext) { - auto StackId = getStackId(IdOrIndex); - ContextNode *Node = getNodeForStackId(StackId); - if (!Node) - break; - StackIds.push_back(StackId); - } - return StackIds; -} - -ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { - for (auto &F : M) { - std::vector CallsWithMetadata; - for (auto &BB : F) { - for (auto &I : BB) { - if (!isa(I)) - continue; - if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { - CallsWithMetadata.push_back(&I); - auto *AllocNode = addAllocNode(&I, &F); - auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); - assert(CallsiteMD); - CallStack CallsiteContext(CallsiteMD); - // Add all of the MIBs and their stack nodes. - for (auto &MDOp : MemProfMD->operands()) { - auto *MIBMD = cast(MDOp); - MDNode *StackNode = getMIBStackNode(MIBMD); - assert(StackNode); - CallStack StackContext(StackNode); - addStackNodesForMIB( - AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD)); - } - assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); - // Memprof and callsite metadata on memory allocations no longer - // needed. - I.setMetadata(LLVMContext::MD_memprof, nullptr); - I.setMetadata(LLVMContext::MD_callsite, nullptr); - } - // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) - CallsWithMetadata.push_back(&I); - } - } - if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); - } - - if (DumpCCG) { - dbgs() << "CCG before updating call stack chains:\n"; - dbgs() << *this; - } - - if (ExportToDot) - exportToDot("prestackupdate"); - - updateStackNodes(); - - handleCallsitesWithMultipleTargets(); - - // Strip off remaining callsite metadata, no longer needed. - for (auto &FuncEntry : FuncToCallsWithMetadata) - for (auto &Call : FuncEntry.second) - Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); -} - -template -void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { - // Look for and workaround callsites that call multiple functions. - // This can happen for indirect calls, which needs better handling, and in - // more rare cases (e.g. macro expansion). - // TODO: To fix this for indirect calls we will want to perform speculative - // devirtualization using either the normal PGO info with ICP, or using the - // information in the profiled MemProf contexts. We can do this prior to - // this transformation for regular LTO, and for ThinLTO we can simulate that - // effect in the summary and perform the actual speculative devirtualization - // while cloning in the ThinLTO backend. - for (auto Entry = NonAllocationCallToContextNodeMap.begin(); - Entry != NonAllocationCallToContextNodeMap.end();) { - auto *Node = Entry->second; - assert(Node->Clones.empty()); - // Check all node callees and see if in the same function. - bool Removed = false; - auto Call = Node->Call.call(); - for (auto &Edge : Node->CalleeEdges) { - if (!Edge->Callee->hasCall()) - continue; - assert(NodeToCallingFunc.count(Edge->Callee)); - // Check if the called function matches that of the callee node. - if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) - continue; - // Work around by setting Node to have a null call, so it gets - // skipped during cloning. Otherwise assignFunctions will assert - // because its data structures are not designed to handle this case. - Entry = NonAllocationCallToContextNodeMap.erase(Entry); - Node->setCall(CallInfo()); - Removed = true; - break; - } - if (!Removed) - Entry++; - } -} - -uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { - // In the Module (IR) case this is already the Id. - return IdOrIndex; -} - -bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, - const Function *Func) { - auto *CB = dyn_cast(Call); - if (!CB->getCalledOperand()) - return false; - auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); - auto *CalleeFunc = dyn_cast(CalleeVal); - if (CalleeFunc == Func) - return true; - auto *Alias = dyn_cast(CalleeVal); - return Alias && Alias->getAliasee() == Func; -} - -static std::string getAllocTypeString(uint8_t AllocTypes) { - if (!AllocTypes) - return "None"; - std::string Str; - if (AllocTypes & (uint8_t)AllocationType::NotCold) - Str += "NotCold"; - if (AllocTypes & (uint8_t)AllocationType::Cold) - Str += "Cold"; - return Str; -} - -template -void CallsiteContextGraph::ContextNode::dump() - const { - print(dbgs()); - dbgs() << "\n"; -} - -template -void CallsiteContextGraph::ContextNode::print( - raw_ostream &OS) const { - OS << "Node " << this << "\n"; - OS << "\t"; - printCall(OS); - if (Recursive) - OS << " (recursive)"; - OS << "\n"; - OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; - OS << "\tContextIds:"; - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - OS << " " << Id; - OS << "\n"; - OS << "\tCalleeEdges:\n"; - for (auto &Edge : CalleeEdges) - OS << "\t\t" << *Edge << "\n"; - OS << "\tCallerEdges:\n"; - for (auto &Edge : CallerEdges) - OS << "\t\t" << *Edge << "\n"; - if (!Clones.empty()) { - OS << "\tClones: "; - FieldSeparator FS; - for (auto *Clone : Clones) - OS << FS << Clone; - OS << "\n"; - } else if (CloneOf) { - OS << "\tClone of " << CloneOf << "\n"; - } -} - -template -void CallsiteContextGraph::ContextEdge::dump() - const { - print(dbgs()); - dbgs() << "\n"; -} - -template -void CallsiteContextGraph::ContextEdge::print( - raw_ostream &OS) const { - OS << "Edge from Callee " << Callee << " to Caller: " << Caller - << " AllocTypes: " << getAllocTypeString(AllocTypes); - OS << " ContextIds:"; - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - OS << " " << Id; -} - -template -void CallsiteContextGraph::dump() const { - print(dbgs()); -} - -template -void CallsiteContextGraph::print( - raw_ostream &OS) const { - OS << "Callsite Context Graph:\n"; - using GraphType = const CallsiteContextGraph *; - for (const auto Node : nodes(this)) { - if (Node->isRemoved()) - continue; - Node->print(OS); - OS << "\n"; - } -} - -template -static void checkEdge( - const std::shared_ptr> &Edge) { - // Confirm that alloc type is not None and that we have at least one context - // id. - assert(Edge->AllocTypes != (uint8_t)AllocationType::None); - assert(!Edge->ContextIds.empty()); -} - -template -static void checkNode(const ContextNode *Node) { - if (Node->isRemoved()) - return; - // Node's context ids should be the union of both its callee and caller edge - // context ids. - if (Node->CallerEdges.size()) { - auto EI = Node->CallerEdges.begin(); - auto &FirstEdge = *EI; - EI++; - DenseSet CallerEdgeContextIds(FirstEdge->ContextIds); - for (; EI != Node->CallerEdges.end(); EI++) { - const auto &Edge = *EI; - set_union(CallerEdgeContextIds, Edge->ContextIds); - } - // Node can have more context ids than callers if some contexts terminate at - // node and some are longer. - assert(Node->ContextIds == CallerEdgeContextIds || - set_is_subset(CallerEdgeContextIds, Node->ContextIds)); - } - if (Node->CalleeEdges.size()) { - auto EI = Node->CalleeEdges.begin(); - auto &FirstEdge = *EI; - EI++; - DenseSet CalleeEdgeContextIds(FirstEdge->ContextIds); - for (; EI != Node->CalleeEdges.end(); EI++) { - const auto &Edge = *EI; - set_union(CalleeEdgeContextIds, Edge->ContextIds); - } - assert(Node->ContextIds == CalleeEdgeContextIds); - } -} - -template -void CallsiteContextGraph::check() const { - using GraphType = const CallsiteContextGraph *; - for (const auto Node : nodes(this)) { - checkNode(Node); - for (auto &Edge : Node->CallerEdges) - checkEdge(Edge); - } -} - -template -struct GraphTraits *> { - using GraphType = const CallsiteContextGraph *; - using NodeRef = const ContextNode *; - - using NodePtrTy = std::unique_ptr>; - static NodeRef getNode(const NodePtrTy &P) { return P.get(); } - - using nodes_iterator = - mapped_iterator::const_iterator, - decltype(&getNode)>; - - static nodes_iterator nodes_begin(GraphType G) { - return nodes_iterator(G->NodeOwner.begin(), &getNode); - } - - static nodes_iterator nodes_end(GraphType G) { - return nodes_iterator(G->NodeOwner.end(), &getNode); - } - - static NodeRef getEntryNode(GraphType G) { - return G->NodeOwner.begin()->get(); - } - - using EdgePtrTy = std::shared_ptr>; - static const ContextNode * - GetCallee(const EdgePtrTy &P) { - return P->Callee; - } - - using ChildIteratorType = - mapped_iterator>>::const_iterator, - decltype(&GetCallee)>; - - static ChildIteratorType child_begin(NodeRef N) { - return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); - } - - static ChildIteratorType child_end(NodeRef N) { - return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); - } -}; - -template -struct DOTGraphTraits *> - : public DefaultDOTGraphTraits { - DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} - - using GraphType = const CallsiteContextGraph *; - using GTraits = GraphTraits; - using NodeRef = typename GTraits::NodeRef; - using ChildIteratorType = typename GTraits::ChildIteratorType; - - static std::string getNodeLabel(NodeRef Node, GraphType G) { - std::string LabelString = - (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + - Twine(Node->OrigStackOrAllocId)) - .str(); - LabelString += "\n"; - if (Node->hasCall()) { - auto Func = G->NodeToCallingFunc.find(Node); - assert(Func != G->NodeToCallingFunc.end()); - LabelString += - G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); - } else { - LabelString += "null call"; - if (Node->Recursive) - LabelString += " (recursive)"; - else - LabelString += " (external)"; - } - return LabelString; - } - - static std::string getNodeAttributes(NodeRef Node, GraphType) { - std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + - getContextIds(Node->ContextIds) + "\"") - .str(); - AttributeString += - (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); - AttributeString += ",style=\"filled\""; - if (Node->CloneOf) { - AttributeString += ",color=\"blue\""; - AttributeString += ",style=\"filled,bold,dashed\""; - } else - AttributeString += ",style=\"filled\""; - return AttributeString; - } - - static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, - GraphType) { - auto &Edge = *(ChildIter.getCurrent()); - return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + - Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") - .str(); - } - - // Since the NodeOwners list includes nodes that are no longer connected to - // the graph, skip them here. - static bool isNodeHidden(NodeRef Node, GraphType) { - return Node->isRemoved(); - } - -private: - static std::string getContextIds(const DenseSet &ContextIds) { - std::string IdString = "ContextIds:"; - if (ContextIds.size() < 100) { - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - IdString += (" " + Twine(Id)).str(); - } else { - IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); - } - return IdString; - } - - static std::string getColor(uint8_t AllocTypes) { - if (AllocTypes == (uint8_t)AllocationType::NotCold) - // Color "brown1" actually looks like a lighter red. - return "brown1"; - if (AllocTypes == (uint8_t)AllocationType::Cold) - return "cyan"; - if (AllocTypes == - ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) - // Lighter purple. - return "mediumorchid1"; - return "gray"; - } - - static std::string getNodeId(NodeRef Node) { - std::stringstream SStream; - SStream << std::hex << "N0x" << (unsigned long long)Node; - std::string Result = SStream.str(); - return Result; - } -}; - -template -void CallsiteContextGraph::exportToDot( - std::string Label) const { - WriteGraph(this, "", false, Label, - DotFilePathPrefix + "ccg." + Label + ".dot"); -} - -template -bool CallsiteContextGraph::process() { - if (DumpCCG) { - dbgs() << "CCG before cloning:\n"; - dbgs() << *this; - } - if (ExportToDot) - exportToDot("postbuild"); - - if (VerifyCCG) { - check(); - } - - return false; -} - -bool MemProfContextDisambiguation::processModule(Module &M) { - bool Changed = false; - - ModuleCallsiteContextGraph CCG(M); - Changed = CCG.process(); - - return Changed; -} - -PreservedAnalyses MemProfContextDisambiguation::run(Module &M, - ModuleAnalysisManager &AM) { - if (!processModule(M)) - return PreservedAnalyses::all(); - return PreservedAnalyses::none(); -} diff --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll new file mode 100644 index 0000000..597cd44 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-summary.ll @@ -0,0 +1,184 @@ +;; Check memprof summaries (per module, combined index, and distributed indexes) + +; RUN: split-file %s %t +; RUN: opt -module-summary %t/a.ll -o %ta.bc +; RUN: opt -module-summary %t/b.ll -o %tb.bc + +; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA +; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562 + +; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB +; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798 +; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275 +; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000 + +; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA +; PRELINKBCANA: + +; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB +; PRELINKBCANB: + +; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%ta.bc,main,plx \ +; RUN: -r=%ta.bc,_Z3foov, \ +; RUN: -r=%ta.bc,free, \ +; RUN: -r=%ta.bc,sleep, \ +; RUN: -r=%tb.bc,_Z3foov,pl \ +; RUN: -r=%tb.bc,_Znam, \ +; RUN: -r=%tb.bc,_Z3bazv,pl + +; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS +; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) +; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) +; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184)))))) +; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438)))))) + +; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN +; COMBINEDBCAN: + +; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA +; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848)))))) +; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) + +; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB +; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) +; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) +; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184)))))) + +; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA +; DISTRIBUTEDBCANA: + +; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB +; DISTRIBUTEDBCANB: + +;--- a.ll +; ModuleID = 'a.cc' +source_filename = "a.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress norecurse uwtable +define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 { +entry: + %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43 + %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47 + call void @free(ptr noundef %call) #4, !dbg !48 + %call2 = call i32 @sleep(i32 noundef 10), !dbg !49 + call void @free(ptr noundef %call1) #4, !dbg !50 + ret i32 0, !dbg !51 +} + +declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1 + +; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") +declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3 + +declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1 + +attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly } +attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!40 = !DISubroutineType(types: !41) +!41 = !{} +!42 = !DILocation(line: 6, column: 13, scope: !39) +!43 = !{i64 8632435727821051414} +!44 = !DILocation(line: 7, column: 13, scope: !39) +!45 = !{i64 -3421689549917153178} +!46 = !DILocation(line: 8, column: 3, scope: !39) +!47 = !DILocation(line: 9, column: 3, scope: !39) +!48 = !DILocation(line: 10, column: 3, scope: !39) +!49 = !DILocation(line: 11, column: 3, scope: !39) +!50 = !DILocation(line: 12, column: 3, scope: !39) +!51 = !DILocation(line: 13, column: 3, scope: !39) +!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) +!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) +!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a") + +;--- b.ll +; ModuleID = 'b.cc' +source_filename = "b.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline uwtable +define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 { +entry: + %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48 + ret ptr %call, !dbg !49 +} + +; Function Attrs: nobuiltin allocsize(0) +declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1 + +; Function Attrs: mustprogress noinline uwtable +define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 { +entry: + %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52 + ret ptr %call, !dbg !53 +} + +; Function Attrs: mustprogress uwtable +define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 { +entry: + %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56 + ret ptr %call, !dbg !57 +} + +attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { builtin allocsize(0) } +attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!40 = !DISubroutineType(types: !41) +!41 = !{} +!42 = !DILocation(line: 2, column: 10, scope: !39) +!43 = !{!44, !46} +!44 = !{!45, !"notcold"} +!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!46 = !{!47, !"cold"} +!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!48 = !{i64 9086428284934609951} +!49 = !DILocation(line: 2, column: 3, scope: !39) +!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!51 = !DILocation(line: 6, column: 10, scope: !50) +!52 = !{i64 -5964873800580613432} +!53 = !DILocation(line: 6, column: 3, scope: !50) +!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!55 = !DILocation(line: 10, column: 10, scope: !54) +!56 = !{i64 2732490490862098848} +!57 = !DILocation(line: 10, column: 3, scope: !54) diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll deleted file mode 100644 index 539d88a..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ /dev/null @@ -1,158 +0,0 @@ -;; Test callsite context graph generation for simple call graph with -;; two memprof contexts and no inlining. -;; -;; Original code looks like: -;; -;; char *bar() { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @main() #0 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !0 - %call1 = call noundef ptr @_Z3foov(), !callsite !1 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 - -; Function Attrs: nobuiltin -declare void @_ZdaPv() #2 - -define internal ptr @_Z3barv() #3 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 - ret ptr null -} - -declare ptr @_Znam(i64) - -define internal ptr @_Z3bazv() #4 { -entry: - %call = call noundef ptr @_Z3barv(), !callsite !8 - ret ptr null -} - -; Function Attrs: noinline -define internal ptr @_Z3foov() #5 { -entry: - %call = call noundef ptr @_Z3bazv(), !callsite !9 - ret ptr null -} - -; uselistorder directives -uselistorder ptr @_Z3foov, { 1, 0 } - -attributes #0 = { "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #2 = { nobuiltin } -attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #4 = { "stack-protector-buffer-size"="8" } -attributes #5 = { noinline } -attributes #6 = { builtin } - -!0 = !{i64 8632435727821051414} -!1 = !{i64 -3421689549917153178} -!2 = !{!3, !5} -!3 = !{!4, !"notcold"} -!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold"} -!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!7 = !{i64 9086428284934609951} -!8 = !{i64 -5964873800580613432} -!9 = !{i64 2732490490862098848} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -; DUMP: Node [[BAZ]] -; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -; DUMP: Node [[FOO]] -; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; -; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; -; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; -; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll deleted file mode 100644 index c5ed97f..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ /dev/null @@ -1,232 +0,0 @@ -;; Test callsite context graph generation for call graph with with MIBs -;; that have pruned contexts that partially match multiple inlined -;; callsite contexts, requiring duplication of context ids and nodes -;; while matching callsite nodes onto the graph. -;; -;; Original code looks like: -;; -;; char *D() { -;; return new char[10]; -;; } -;; -;; char *F() { -;; return D(); -;; } -;; -;; char *C() { -;; return D(); -;; } -;; -;; char *B() { -;; return C(); -;; } -;; -;; char *E() { -;; return C(); -;; } -;; int main(int argc, char **argv) { -;; char *x = B(); // cold -;; char *y = E(); // cold -;; char *z = F(); // default -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; memset(z, 0, 10); -;; delete[] z; -;; sleep(10); -;; delete[] x; -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of C into both B and E. -;; Since both allocation contexts via C are cold, the matched memprof -;; metadata has the context pruned above C's callsite. This requires -;; matching the stack node for C to callsites where it was inlined (i.e. -;; the callsites in B and E that have callsite metadata that includes C's). -;; It also requires duplication of that node in the graph as well as the -;; duplication of the context ids along that path through the graph, -;; so that we can represent the duplicated (via inlining) C callsite. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define internal ptr @_Z1Dv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5 - ret ptr null -} - -declare ptr @_Znam(i64) - -define internal ptr @_Z1Fv() #0 { -entry: - %call = call noundef ptr @_Z1Dv(), !callsite !6 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Cv() #1 { -entry: - %call = call noundef ptr @_Z1Dv(), !callsite !7 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Bv() #1 { -entry: - %call.i = call noundef ptr @_Z1Dv(), !callsite !8 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Ev() #1 { -entry: - %call.i = call noundef ptr @_Z1Dv(), !callsite !9 - ret ptr null -} - -; Function Attrs: noinline -declare i32 @main() #2 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: nounwind -declare void @_ZdaPv() #4 - -declare i32 @sleep() #5 - -attributes #0 = { "disable-tail-calls"="true" } -attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { noinline } -attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #4 = { nounwind } -attributes #5 = { "no-trapping-math"="true" } -attributes #6 = { builtin } - -!0 = !{!1, !3} -!1 = !{!2, !"cold"} -!2 = !{i64 6541423618768552252, i64 -6270142974039008131} -!3 = !{!4, !"notcold"} -!4 = !{i64 6541423618768552252, i64 -4903163940066524832} -!5 = !{i64 6541423618768552252} -!6 = !{i64 -4903163940066524832} -!7 = !{i64 -6270142974039008131} -!8 = !{i64 -6270142974039008131, i64 -184525619819294889} -!9 = !{i64 -6270142974039008131, i64 1905834578520680781} - - -;; After adding only the alloc node memprof metadata, we only have 2 contexts. - -; DUMP: CCG before updating call stack chains: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 -; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 - -; DUMP: Node [[C]] -; DUMP: null Call -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[F]] -; DUMP: null Call -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: CallerEdges: - -;; After updating for callsite metadata, we should have generated context ids 3 and 4, -;; along with 2 new nodes for those callsites. All have the same allocation type -;; behavior as the original C node. - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 -; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 - -; DUMP: Node [[F]] -; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[C2]] -; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 -; DUMP: CallerEdges: - -; DUMP: Node [[B]] -; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[E]] -; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 -; DUMP: CallerEdges: - - -; DOTPRE: digraph "prestackupdate" { -; DOTPRE: label="prestackupdate"; -; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; -; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; -; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; -; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; -; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; -; DOTPRE: } - - -; DOTPOST:digraph "postbuild" { -; DOTPOST: label="postbuild"; -; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; -; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; -; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; -; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; -; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; -; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; -; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; -; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; -; DOTPOST:} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll deleted file mode 100644 index da0fd3f..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll +++ /dev/null @@ -1,386 +0,0 @@ -;; Test callsite context graph generation for call graph with with MIBs -;; that have pruned contexts that partially match multiple inlined -;; callsite contexts, requiring duplication of context ids and nodes -;; while matching callsite nodes onto the graph. This test requires more -;; complex duplication due to multiple contexts for different allocations -;; that share some of the same callsite nodes. -;; -;; Original code looks like: -;; -;; char *D(bool Call1) { -;; if (Call1) -;; return new char[10]; -;; else -;; return new char[10]; -;; } -;; -;; char *C(bool Call1) { -;; return D(Call1); -;; } -;; -;; char *B(bool Call1) { -;; if (Call1) -;; return C(true); -;; else -;; return C(false); -;; } -;; -;; char *A(bool Call1) { -;; return B(Call1); -;; } -;; -;; char *A1() { -;; return A(true); -;; } -;; -;; char *A2() { -;; return A(true); -;; } -;; -;; char *A3() { -;; return A(false); -;; } -;; -;; char *A4() { -;; return A(false); -;; } -;; -;; char *E() { -;; return B(true); -;; } -;; -;; char *F() { -;; return B(false); -;; } -;; -;; int main(int argc, char **argv) { -;; char *a1 = A1(); // cold -;; char *a2 = A2(); // cold -;; char *e = E(); // default -;; char *a3 = A3(); // default -;; char *a4 = A4(); // default -;; char *f = F(); // cold -;; memset(a1, 0, 10); -;; memset(a2, 0, 10); -;; memset(e, 0, 10); -;; memset(a3, 0, 10); -;; memset(a4, 0, 10); -;; memset(f, 0, 10); -;; delete[] a3; -;; delete[] a4; -;; delete[] e; -;; sleep(10); -;; delete[] a1; -;; delete[] a2; -;; delete[] f; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of A into its callers, -;; without any other inlining or optimizations. Since both allocation contexts -;; via A for each allocation in D have the same allocation type (cold via -;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second -;; new in D, the contexts for those respective allocations are pruned above A. -;; The allocations via E and F are to ensure we don't prune above B. -;; -;; The matching onto the inlined A[1234]->A sequences will require duplication -;; of the context id assigned to the context from A for each allocation in D. -;; This test ensures that we do this correctly in the presence of callsites -;; shared by the different duplicated context ids (i.e. callsite in C). -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Db(i1 %Call1) #0 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 - br label %return - -if.else: ; No predecessors! - %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11 - br label %return - -return: ; preds = %if.else, %entry - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #1 - -define ptr @_Z1Cb(i1 %Call1) { -entry: - %tobool = trunc i8 0 to i1 - %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Bb(i1 %Call1) #0 { -entry: - %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13 - br label %return - -if.else: ; No predecessors! - %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14 - br label %return - -return: ; preds = %if.else, %entry - ret ptr null -} - -define ptr @_Z1Ab(i1 %tobool) #2 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A1v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A2v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A3v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A4v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Ev() #0 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Fv() #0 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21 - ret ptr null -} - -; Function Attrs: noinline -declare i32 @main() #3 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -; uselistorder directives -uselistorder ptr @_Znam, { 1, 0 } - -attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nobuiltin } -attributes #2 = { "tune-cpu"="generic" } -attributes #3 = { noinline } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin allocsize(0) } - -!0 = !{!1, !3} -!1 = !{!2, !"notcold"} -!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} -!3 = !{!4, !"cold"} -!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} -!5 = !{i64 4854880825882961848} -!6 = !{!7, !9} -!7 = !{!8, !"notcold"} -!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} -!9 = !{!10, !"cold"} -!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} -!11 = !{i64 -8775068539491628272} -!12 = !{i64 -904694911315397047} -!13 = !{i64 6532298921261778285} -!14 = !{i64 7859682663773658275} -!15 = !{i64 -6528110295079665978} -!16 = !{i64 -6528110295079665978, i64 5747919905719679568} -!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} -!18 = !{i64 -6528110295079665978, i64 1794685869326395337} -!19 = !{i64 -6528110295079665978, i64 5462047985461644151} -!20 = !{i64 1905834578520680781} -!21 = !{i64 -4903163940066524832} - - -;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only -;; match the interesting parts of the pre-update graph here). - -; DUMP: CCG before updating call stack chains: -; DUMP: Callsite Context Graph: - -; DUMP: Node [[D1:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 - -; DUMP: Node [[C:0x[a-z0-9]+]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 - -; DUMP: Node [[D2]] -; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 - - -;; After updating for callsite metadata, we should have duplicated the context -;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, -;; and used those on new nodes for those callers. Note that while in reality -;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, -;; due to the pruning we have lost this information and thus end up duplicating -;; both of A's contexts to all of the new nodes (which could result in some -;; unnecessary cloning. - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D1]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 5 7 9 11 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 - -; DUMP: Node [[C]] -; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 - -; DUMP: Node [[B1]] -; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 5 7 9 11 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 -; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 -; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 -; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 -; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -; DUMP: Node [[E]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[D2]] -; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 6 8 10 12 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 - -; DUMP: Node [[B2]] -; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 6 8 10 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 -; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 -; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 -; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 -; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 - -; DUMP: Node [[F]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[A2]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 5 6 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 -; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 -; DUMP: CallerEdges: - -; DUMP: Node [[A3]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 7 8 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 -; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 -; DUMP: CallerEdges: - -; DUMP: Node [[A1]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 9 10 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 -; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 -; DUMP: CallerEdges: - -; DUMP: Node [[A4]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 11 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 -; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 -; DUMP: CallerEdges: - -; DUMP: Node [[A]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 2 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 -; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 -; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll deleted file mode 100644 index 9ebf219..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ /dev/null @@ -1,261 +0,0 @@ -;; Tests callsite context graph generation for call graph containing indirect -;; calls. Currently this should result in conservative behavior, such that the -;; indirect call receives a null call in its graph node, to prevent subsequent -;; cloning. -;; -;; Original code looks like: -;; -;; char *foo() { -;; return new char[10]; -;; } -;; class A { -;; public: -;; virtual char *x() { return foo(); } -;; }; -;; class B : public A { -;; public: -;; char *x() final { return foo(); } -;; }; -;; char *bar(A *a) { -;; return a->x(); -;; } -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; B b; -;; char *z = bar(&b); -;; char *w = bar(&b); -;; A a; -;; char *r = bar(&a); -;; char *s = bar(&a); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; memset(z, 0, 10); -;; memset(w, 0, 10); -;; memset(r, 0, 10); -;; memset(s, 0, 10); -;; delete[] x; -;; delete[] w; -;; delete[] r; -;; sleep(10); -;; delete[] y; -;; delete[] z; -;; delete[] s; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; Compiled without optimization to prevent inlining and devirtualization. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare ptr @_Z3barP1A(ptr) - -define i32 @main(ptr %b, ptr %a) #0 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !0 - %call1 = call noundef ptr @_Z3foov(), !callsite !1 - %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2 - %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3 - %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4 - %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5 - ret i32 0 -} - -; Function Attrs: noinline -declare void @_ZN1BC2Ev() #1 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 - -; Function Attrs: nobuiltin -declare void @_ZdaPv() #3 - -define internal ptr @_ZN1A1xEv() #4 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !6 - ret ptr null -} - -; Function Attrs: mustprogress uwtable -define internal ptr @_ZN1B1xEv() #5 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !7 - ret ptr null -} - -; Function Attrs: mustprogress uwtable -define internal ptr @_Z3foov() #5 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21 - ret ptr null -} - -declare ptr @_Znam(i64) #6 - -; uselistorder directives -uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } - -attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #1 = { noinline } -attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #3 = { nobuiltin } -attributes #4 = { "tune-cpu"="generic" } -attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!0 = !{i64 8632435727821051414} -!1 = !{i64 -3421689549917153178} -!2 = !{i64 6792096022461663180} -!3 = !{i64 -2709642582978494015} -!4 = !{i64 748269490701775343} -!5 = !{i64 -5747251260480066785} -!6 = !{i64 8256774051149711748} -!7 = !{i64 -4831879094954754638} -!8 = !{!9, !11, !13, !15, !17, !19} -!9 = !{!10, !"notcold"} -!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} -!11 = !{!12, !"cold"} -!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} -!13 = !{!14, !"notcold"} -!14 = !{i64 2732490490862098848, i64 8632435727821051414} -!15 = !{!16, !"cold"} -!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} -!17 = !{!18, !"notcold"} -!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} -!19 = !{!20, !"cold"} -!20 = !{i64 2732490490862098848, i64 -3421689549917153178} -!21 = !{i64 2732490490862098848} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[FOO:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 5 6 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 - -; DUMP: Node [[AX]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -;; Bar contains an indirect call, with multiple targets. It's call should be null. -; DUMP: Node [[BAR]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 4 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 - -; DUMP: Node [[MAIN3]] -; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN4]] -; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 -; DUMP: CallerEdges: - -; DUMP: Node [[BX]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 4 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 - -; DUMP: Node [[MAIN5]] -; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN6]] -; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 6 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 -; DUMP: CallerEdges: - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; -; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; -; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; -; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; -; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; -; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; -; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll deleted file mode 100644 index 59f135c..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ /dev/null @@ -1,189 +0,0 @@ -;; Test callsite context graph generation for call graph with two memprof -;; contexts and partial inlining, requiring generation of a new fused node to -;; represent the inlined sequence while matching callsite nodes onto the graph. -;; -;; Original code looks like: -;; -;; char *bar() { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of baz into foo, and -;; bar into baz. Due to the inlining of bar we will initially have two -;; allocation nodes in the graph. This tests that we correctly match -;; foo (with baz inlined) onto the graph nodes first, and generate a new -;; fused node for it. We should then not match baz (with bar inlined) as that -;; is not reached by the MIB contexts (since all calls from main will look -;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define internal ptr @_Z3barv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #0 - -; Function Attrs: mustprogress -define internal ptr @_Z3bazv() #1 { -entry: - %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6 - ret ptr null -} - -; Function Attrs: noinline -define internal ptr @_Z3foov() #2 { -entry: - %call.i = call noundef ptr @_Z3barv(), !callsite !7 - ret ptr null -} - -define i32 @main() #3 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !8 - %call1 = call noundef ptr @_Z3foov(), !callsite !9 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -; Function Attrs: nounwind -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -attributes #0 = { nobuiltin } -attributes #1 = { mustprogress } -attributes #2 = { noinline } -attributes #3 = { "tune-cpu"="generic" } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { nounwind } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!0 = !{!1, !3} -!1 = !{!2, !"notcold"} -!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!3 = !{!4, !"cold"} -!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!5 = !{i64 9086428284934609951} -!6 = !{i64 9086428284934609951, i64 -5964873800580613432} -!7 = !{i64 -5964873800580613432, i64 2732490490862098848} -!8 = !{i64 8632435727821051414} -!9 = !{i64 -3421689549917153178} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -;; This is leftover from the MIB on the alloc inlined into baz. It is not -;; matched with any call, since there is no such node in the IR. Due to the -;; null call it will not participate in any context transformations. -; DUMP: Node [[FOO2:0x[a-z0-9]+]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[BAZ]] -; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 - -;; This is the node synthesized for the call to bar in foo that was created -;; by inlining baz into foo. -; DUMP: Node [[FOO]] -; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; -; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; -; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; -; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; -; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll deleted file mode 100644 index a3a056a..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll +++ /dev/null @@ -1,135 +0,0 @@ -;; Test callsite context graph generation for call graph with two memprof -;; contexts and multiple levels of inlining, requiring generation of new -;; fused nodes to represent the inlined sequence while matching callsite -;; nodes onto the graph. In particular this tests the case where a function -;; has inlined a callee containing an inlined callee. -;; -;; Original code looks like: -;; -;; char *bar() __attribute__((noinline)) { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; Both foo and baz are inlined into main, at both foo callsites. -;; We should update the graph for new fused nodes for both of those inlined -;; callsites to bar. -;; -;; Note that baz and bar are both dead due to the inlining, but have been left -;; in the input IR to ensure that the MIB call chain is matched to the longer -;; inline sequences from main. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define ptr @_Z3barv() #0 { -entry: - %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13 - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #1 - -; Function Attrs: mustprogress -declare ptr @_Z3bazv() #2 - -define i32 @main() #3 { -delete.end5: - %call.i.i = call noundef ptr @_Z3barv(), !callsite !14 - %call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -attributes #0 = { "stack-protector-buffer-size"="8" } -attributes #1 = { nobuiltin } -attributes #2 = { mustprogress } -attributes #3 = { "tune-cpu"="generic" } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} - -!0 = !{i32 7, !"Dwarf Version", i32 5} -!1 = !{i32 2, !"Debug Info Version", i32 3} -!2 = !{i32 1, !"wchar_size", i32 4} -!3 = !{i32 8, !"PIC Level", i32 2} -!4 = !{i32 7, !"PIE Level", i32 2} -!5 = !{i32 7, !"uwtable", i32 2} -!6 = !{i32 7, !"frame-pointer", i32 2} -!7 = !{!8, !10} -!8 = !{!9, !"notcold"} -!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!10 = !{!11, !"cold"} -!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!12 = !{i64 9086428284934609951} -!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -;; This is the node synthesized for the first inlined call chain of main->foo->baz -; DUMP: Node [[MAIN1]] -; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -;; This is the node synthesized for the second inlined call chain of main->foo->baz -; DUMP: Node [[MAIN2]] -; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll deleted file mode 100644 index fede5fe..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll +++ /dev/null @@ -1,41 +0,0 @@ -;; Test that MemProfContextDisambiguation is enabled under the expected conditions -;; and in the expected position. - -;; Pass is not currently enabled by default at any opt level. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" - -;; Pass should not run even under option at O0/O1. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" - -;; Pass should be enabled under option at O2/O3. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED - -;; When enabled, MemProfContextDisambiguation runs just after inlining. -; ENABLED: Running pass: InlinerPass -; ENABLED: Invalidating analysis: InlineAdvisorAnalysis -; ENABLED: Running pass: MemProfContextDisambiguation - -define noundef ptr @_Z3barv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) - ret ptr %call -} - -declare noundef nonnull ptr @_Znam(i64 noundef) -- 2.7.4