From 1392b654ff6573ca2dba4101f72e990822539c7d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 23 Nov 2021 20:10:36 +0000 Subject: [PATCH] Revert "profi - a flow-based profile inference algorithm: Part I (out of 3)" This reverts commit 884b6dd311422bbfac62b8a90fbfff8e77ba8121. The windows build is broken with a linker error. --- .../llvm/Transforms/Utils/SampleProfileInference.h | 284 ------------- .../Transforms/Utils/SampleProfileLoaderBaseImpl.h | 162 +++----- llvm/lib/Transforms/IPO/SampleProfile.cpp | 30 +- llvm/lib/Transforms/Utils/CMakeLists.txt | 1 - .../Transforms/Utils/SampleProfileInference.cpp | 461 --------------------- .../Utils/SampleProfileLoaderBaseUtil.cpp | 4 - .../SampleProfile/Inputs/profile-inference.prof | 23 - .../Transforms/SampleProfile/profile-inference.ll | 245 ----------- 8 files changed, 52 insertions(+), 1158 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Utils/SampleProfileInference.h delete mode 100644 llvm/lib/Transforms/Utils/SampleProfileInference.cpp delete mode 100644 llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof delete mode 100644 llvm/test/Transforms/SampleProfile/profile-inference.ll diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h deleted file mode 100644 index e1f681b..0000000 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h +++ /dev/null @@ -1,284 +0,0 @@ -//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file provides the interface for the profile inference algorithm, profi. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H -#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallVector.h" - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" - -namespace llvm { - -class BasicBlock; -class Function; -class MachineBasicBlock; -class MachineFunction; - -namespace afdo_detail { - -template struct TypeMap {}; -template <> struct TypeMap { - using BasicBlockT = BasicBlock; - using FunctionT = Function; -}; -template <> struct TypeMap { - using BasicBlockT = MachineBasicBlock; - using FunctionT = MachineFunction; -}; - -} // end namespace afdo_detail - -struct FlowJump; - -/// A wrapper of a binary basic block. -struct FlowBlock { - uint64_t Index; - uint64_t Weight{0}; - bool UnknownWeight{false}; - uint64_t Flow{0}; - bool HasSelfEdge{false}; - std::vector SuccJumps; - std::vector PredJumps; - - /// Check if it is the entry block in the function. - bool isEntry() const { return PredJumps.empty(); } - - /// Check if it is an exit block in the function. - bool isExit() const { return SuccJumps.empty(); } -}; - -/// A wrapper of a jump between two basic blocks. -struct FlowJump { - uint64_t Source; - uint64_t Target; - uint64_t Flow{0}; - bool IsUnlikely{false}; -}; - -/// A wrapper of binary function with basic blocks and jumps. -struct FlowFunction { - std::vector Blocks; - std::vector Jumps; - /// The index of the entry block. - uint64_t Entry; -}; - -void applyFlowInference(FlowFunction &Func); - -/// Sample profile inference pass. -template class SampleProfileInference { -public: - using BasicBlockT = typename afdo_detail::TypeMap::BasicBlockT; - using FunctionT = typename afdo_detail::TypeMap::FunctionT; - using Edge = std::pair; - using BlockWeightMap = DenseMap; - using EdgeWeightMap = DenseMap; - using BlockEdgeMap = - DenseMap>; - - SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, - BlockWeightMap &SampleBlockWeights) - : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {} - - /// Apply the profile inference algorithm for a given function - void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); - -private: - /// Try to infer branch probabilities mimicking implementation of - /// BranchProbabilityInfo. Unlikely taken branches are marked so that the - /// inference algorithm can avoid sending flow along corresponding edges. - void findUnlikelyJumps(const std::vector &BasicBlocks, - BlockEdgeMap &Successors, FlowFunction &Func); - - /// Determine whether the block is an exit in the CFG. - bool isExit(const BasicBlockT *BB); - - /// Function. - const FunctionT &F; - - /// Successors for each basic block in the CFG. - BlockEdgeMap &Successors; - - /// Map basic blocks to their sampled weights. - BlockWeightMap &SampleBlockWeights; -}; - -template -void SampleProfileInference::apply(BlockWeightMap &BlockWeights, - EdgeWeightMap &EdgeWeights) { - // Find all forwards reachable blocks which the inference algorithm will be - // applied on. - df_iterator_default_set Reachable; - for (auto *BB : depth_first_ext(&F, Reachable)) - (void)BB /* Mark all reachable blocks */; - - // Find all backwards reachable blocks which the inference algorithm will be - // applied on. - df_iterator_default_set InverseReachable; - for (const auto &BB : F) { - // An exit block is a block without any successors. - if (isExit(&BB)) { - for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable)) - (void)RBB; - } - } - - // Keep a stable order for reachable blocks - DenseMap BlockIndex; - std::vector BasicBlocks; - BlockIndex.reserve(Reachable.size()); - BasicBlocks.reserve(Reachable.size()); - for (const auto &BB : F) { - if (Reachable.count(&BB) && InverseReachable.count(&BB)) { - BlockIndex[&BB] = BasicBlocks.size(); - BasicBlocks.push_back(&BB); - } - } - - BlockWeights.clear(); - EdgeWeights.clear(); - bool HasSamples = false; - for (const auto *BB : BasicBlocks) { - auto It = SampleBlockWeights.find(BB); - if (It != SampleBlockWeights.end() && It->second > 0) { - HasSamples = true; - BlockWeights[BB] = It->second; - } - } - // Quit early for functions with a single block or ones w/o samples - if (BasicBlocks.size() <= 1 || !HasSamples) { - return; - } - - // Create necessary objects - FlowFunction Func; - Func.Blocks.reserve(BasicBlocks.size()); - // Create FlowBlocks - for (const auto *BB : BasicBlocks) { - FlowBlock Block; - if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) { - Block.UnknownWeight = false; - Block.Weight = SampleBlockWeights[BB]; - } else { - Block.UnknownWeight = true; - Block.Weight = 0; - } - Block.Index = Func.Blocks.size(); - Func.Blocks.push_back(Block); - } - // Create FlowEdges - for (const auto *BB : BasicBlocks) { - for (auto *Succ : Successors[BB]) { - if (!BlockIndex.count(Succ)) - continue; - FlowJump Jump; - Jump.Source = BlockIndex[BB]; - Jump.Target = BlockIndex[Succ]; - Func.Jumps.push_back(Jump); - if (BB == Succ) { - Func.Blocks[BlockIndex[BB]].HasSelfEdge = true; - } - } - } - for (auto &Jump : Func.Jumps) { - Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); - Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); - } - - // Try to infer probabilities of jumps based on the content of basic block - findUnlikelyJumps(BasicBlocks, Successors, Func); - - // Find the entry block - for (size_t I = 0; I < Func.Blocks.size(); I++) { - if (Func.Blocks[I].isEntry()) { - Func.Entry = I; - break; - } - } - - // Create and apply the inference network model. - applyFlowInference(Func); - - // Extract the resulting weights from the control flow - // All weights are increased by one to avoid propagation errors introduced by - // zero weights. - for (const auto *BB : BasicBlocks) { - BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow; - } - for (auto &Jump : Func.Jumps) { - Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]); - EdgeWeights[E] = Jump.Flow; - } - -#ifndef NDEBUG - // Unreachable blocks and edges should not have a weight. - for (auto &I : BlockWeights) { - assert(Reachable.contains(I.first)); - assert(InverseReachable.contains(I.first)); - } - for (auto &I : EdgeWeights) { - assert(Reachable.contains(I.first.first) && - Reachable.contains(I.first.second)); - assert(InverseReachable.contains(I.first.first) && - InverseReachable.contains(I.first.second)); - } -#endif -} - -template -inline void SampleProfileInference::findUnlikelyJumps( - const std::vector &BasicBlocks, - BlockEdgeMap &Successors, FlowFunction &Func) {} - -template <> -inline void SampleProfileInference::findUnlikelyJumps( - const std::vector &BasicBlocks, - BlockEdgeMap &Successors, FlowFunction &Func) { - for (auto &Jump : Func.Jumps) { - const auto *BB = BasicBlocks[Jump.Source]; - const auto *Succ = BasicBlocks[Jump.Target]; - const Instruction *TI = BB->getTerminator(); - // Check if a block ends with InvokeInst and mark non-taken branch unlikely. - // In that case block Succ should be a landing pad - if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) { - if (isa(TI)) { - Jump.IsUnlikely = true; - } - } - const Instruction *SuccTI = Succ->getTerminator(); - // Check if the target block contains UnreachableInst and mark it unlikely - if (SuccTI->getNumSuccessors() == 0) { - if (isa(SuccTI)) { - Jump.IsUnlikely = true; - } - } - } -} - -template -inline bool SampleProfileInference::isExit(const BasicBlockT *BB) { - return BB->succ_empty(); -} - -template <> -inline bool SampleProfileInference::isExit(const BasicBlock *BB) { - return succ_empty(BB); -} - -} // end namespace llvm -#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index e9b3d5a..6a2f0ac 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -38,7 +38,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" namespace llvm { @@ -75,8 +74,6 @@ template <> struct IRTraits { } // end namespace afdo_detail -extern cl::opt SampleProfileUseProfi; - template class SampleProfileLoaderBaseImpl { public: SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) @@ -145,9 +142,6 @@ protected: ArrayRef Descendants, PostDominatorTreeT *DomTree); void propagateWeights(FunctionT &F); - void applyProfi(FunctionT &F, BlockEdgeMap &Successors, - BlockWeightMap &SampleBlockWeights, - BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); @@ -156,11 +150,6 @@ protected: bool computeAndPropagateWeights(FunctionT &F, const DenseSet &InlinedGUIDs); - void initWeightPropagation(FunctionT &F, - const DenseSet &InlinedGUIDs); - void - finalizeWeightPropagation(FunctionT &F, - const DenseSet &InlinedGUIDs); void emitCoverageRemarks(FunctionT &F); /// Map basic blocks to their computed weights. @@ -752,65 +741,50 @@ void SampleProfileLoaderBaseImpl::buildEdges(FunctionT &F) { /// known). template void SampleProfileLoaderBaseImpl::propagateWeights(FunctionT &F) { - // Flow-based profile inference is only usable with BasicBlock instantiation - // of SampleProfileLoaderBaseImpl. - if (SampleProfileUseProfi) { - // Prepare block sample counts for inference. - BlockWeightMap SampleBlockWeights; - for (const auto &BI : F) { - ErrorOr Weight = getBlockWeight(&BI); - if (Weight) - SampleBlockWeights[&BI] = Weight.get(); + bool Changed = true; + unsigned I = 0; + + // If BB weight is larger than its corresponding loop's header BB weight, + // use the BB weight to replace the loop header BB weight. + for (auto &BI : F) { + BasicBlockT *BB = &BI; + LoopT *L = LI->getLoopFor(BB); + if (!L) { + continue; } - // Fill in BlockWeights and EdgeWeights using an inference algorithm. - applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights); - } else { - bool Changed = true; - unsigned I = 0; - - // If BB weight is larger than its corresponding loop's header BB weight, - // use the BB weight to replace the loop header BB weight. - for (auto &BI : F) { - BasicBlockT *BB = &BI; - LoopT *L = LI->getLoopFor(BB); - if (!L) { - continue; - } - BasicBlockT *Header = L->getHeader(); - if (Header && BlockWeights[BB] > BlockWeights[Header]) { - BlockWeights[Header] = BlockWeights[BB]; - } + BasicBlockT *Header = L->getHeader(); + if (Header && BlockWeights[BB] > BlockWeights[Header]) { + BlockWeights[Header] = BlockWeights[BB]; } + } - // Propagate until we converge or we go past the iteration limit. - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); - } + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); - // The first propagation propagates BB counts from annotated BBs to unknown - // BBs. The 2nd propagation pass resets edges weights, and use all BB - // weights to propagate edge weights. - VisitedEdges.clear(); - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); - } + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } - // The 3rd propagation pass allows adjust annotated BB weights that are - // obviously wrong. - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, true); - } + // The first propagation propagates BB counts from annotated BBs to unknown + // BBs. The 2nd propagation pass resets edges weights, and use all BB weights + // to propagate edge weights. + VisitedEdges.clear(); + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); } -} -template -void SampleProfileLoaderBaseImpl::applyProfi( - FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, - BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) { - auto Infer = SampleProfileInference(F, Successors, SampleBlockWeights); - Infer.apply(BlockWeights, EdgeWeights); + // The 3rd propagation pass allows adjust annotated BB weights that are + // obviously wrong. + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, true); + } } /// Generate branch weight metadata for all branches in \p F. @@ -868,64 +842,26 @@ bool SampleProfileLoaderBaseImpl::computeAndPropagateWeights( Changed |= computeBlockWeights(F); if (Changed) { - // Initialize propagation. - initWeightPropagation(F, InlinedGUIDs); + // Add an entry count to the function using the samples gathered at the + // function entry. + // Sets the GUIDs that are inlined in the profiled binary. This is used + // for ThinLink to make correct liveness analysis, and also make the IR + // match the profiled binary before annotation. + getFunction(F).setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); - // Propagate weights to all edges. - propagateWeights(F); - - // Post-process propagated weights. - finalizeWeightPropagation(F, InlinedGUIDs); - } - - return Changed; -} - -template -void SampleProfileLoaderBaseImpl::initWeightPropagation( - FunctionT &F, const DenseSet &InlinedGUIDs) { - // Add an entry count to the function using the samples gathered at the - // function entry. - // Sets the GUIDs that are inlined in the profiled binary. This is used - // for ThinLink to make correct liveness analysis, and also make the IR - // match the profiled binary before annotation. - getFunction(F).setEntryCount( - ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), - &InlinedGUIDs); - - if (!SampleProfileUseProfi) { // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); // Find equivalence classes. findEquivalenceClasses(F); - } - - // Before propagation starts, build, for each block, a list of - // unique predecessors and successors. This is necessary to handle - // identical edges in multiway branches. Since we visit all blocks and all - // edges of the CFG, it is cleaner to build these lists once at the start - // of the pass. - buildEdges(F); -} -template -void SampleProfileLoaderBaseImpl::finalizeWeightPropagation( - FunctionT &F, const DenseSet &InlinedGUIDs) { - // If we utilize a flow-based count inference, then we trust the computed - // counts and set the entry count as computed by the algorithm. This is - // primarily done to sync the counts produced by profi and BFI inference, - // which uses the entry count for mass propagation. - // If profi produces a zero-value for the entry count, we fallback to - // Samples->getHeadSamples() + 1 to avoid functions with zero count. - if (SampleProfileUseProfi) { - const BasicBlockT *EntryBB = getEntryBB(&F); - if (BlockWeights[EntryBB] > 0) { - getFunction(F).setEntryCount( - ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), - &InlinedGUIDs); - } + // Propagate weights to all edges. + propagateWeights(F); } + + return Changed; } template diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 3e01fd1..a961c47 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -84,7 +84,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include @@ -1649,19 +1648,6 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { SmallVector Weights; uint32_t MaxWeight = 0; Instruction *MaxDestInst; - // Since profi treats multiple edges (multiway branches) as a single edge, - // we need to distribute the computed weight among the branches. We do - // this by evenly splitting the edge weight among destinations. - DenseMap EdgeMultiplicity; - std::vector EdgeIndex; - if (SampleProfileUseProfi) { - EdgeIndex.resize(TI->getNumSuccessors()); - for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { - const BasicBlock *Succ = TI->getSuccessor(I); - EdgeIndex[I] = EdgeMultiplicity[Succ]; - EdgeMultiplicity[Succ]++; - } - } for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { BasicBlock *Succ = TI->getSuccessor(I); Edge E = std::make_pair(BB, Succ); @@ -1674,19 +1660,9 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); Weight = std::numeric_limits::max(); } - if (!SampleProfileUseProfi) { - // Weight is added by one to avoid propagation errors introduced by - // 0 weights. - Weights.push_back(static_cast(Weight + 1)); - } else { - // Profi creates proper weights that do not require "+1" adjustments but - // we evenly split the weight among branches with the same destination. - uint64_t W = Weight / EdgeMultiplicity[Succ]; - // Rounding up, if needed, so that first branches are hotter. - if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ]) - W++; - Weights.push_back(static_cast(W)); - } + // Weight is added by one to avoid propagation errors introduced by + // 0 weights. + Weights.push_back(static_cast(Weight + 1)); if (Weight != 0) { if (Weight > MaxWeight) { MaxWeight = Weight; diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 22b9c0b..be4f712 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -60,7 +60,6 @@ add_llvm_component_library(LLVMTransformUtils StripGCRelocates.cpp SSAUpdater.cpp SSAUpdaterBulk.cpp - SampleProfileInference.cpp SampleProfileLoaderBaseUtil.cpp SanitizerStats.cpp SimplifyCFG.cpp diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp deleted file mode 100644 index 412a724..0000000 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ /dev/null @@ -1,461 +0,0 @@ -//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a profile inference algorithm. Given an incomplete and -// possibly imprecise block counts, the algorithm reconstructs realistic block -// and edge counts that satisfy flow conservation rules, while minimally modify -// input block counts. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SampleProfileInference.h" -#include "llvm/Support/Debug.h" -#include -#include - -using namespace llvm; -#define DEBUG_TYPE "sample-profile-inference" - -namespace { - -/// A value indicating an infinite flow/capacity/weight of a block/edge. -/// Not using numeric_limits::max(), as the values can be summed up -/// during the execution. -static constexpr int64_t INF = ((int64_t)1) << 50; - -/// The minimum-cost maximum flow algorithm. -/// -/// The algorithm finds the maximum flow of minimum cost on a given (directed) -/// network using a modified version of the classical Moore-Bellman-Ford -/// approach. The algorithm applies a number of augmentation iterations in which -/// flow is sent along paths of positive capacity from the source to the sink. -/// The worst-case time complexity of the implementation is O(v(f)*m*n), where -/// where m is the number of edges, n is the number of vertices, and v(f) is the -/// value of the maximum flow. However, the observed running time on typical -/// instances is sub-quadratic, that is, o(n^2). -/// -/// The input is a set of edges with specified costs and capacities, and a pair -/// of nodes (source and sink). The output is the flow along each edge of the -/// minimum total cost respecting the given edge capacities. -class MinCostMaxFlow { -public: - // Initialize algorithm's data structures for a network of a given size. - void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { - Source = SourceNode; - Target = SinkNode; - - Nodes = std::vector(NodeCount); - Edges = std::vector>(NodeCount, std::vector()); - } - - // Run the algorithm. - int64_t run() { - // Find an augmenting path and update the flow along the path - size_t AugmentationIters = 0; - while (findAugmentingPath()) { - augmentFlowAlongPath(); - AugmentationIters++; - } - - // Compute the total flow and its cost - int64_t TotalCost = 0; - int64_t TotalFlow = 0; - for (uint64_t Src = 0; Src < Nodes.size(); Src++) { - for (auto &Edge : Edges[Src]) { - if (Edge.Flow > 0) { - TotalCost += Edge.Cost * Edge.Flow; - if (Src == Source) - TotalFlow += Edge.Flow; - } - } - } - LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters - << " iterations with " << TotalFlow << " total flow" - << " of " << TotalCost << " cost\n"); - return TotalCost; - } - - /// Adding an edge to the network with a specified capacity and a cost. - /// Multiple edges between a pair of nodes are allowed but self-edges - /// are not supported. - void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { - assert(Capacity > 0 && "adding an edge of zero capacity"); - assert(Src != Dst && "loop edge are not supported"); - - Edge SrcEdge; - SrcEdge.Dst = Dst; - SrcEdge.Cost = Cost; - SrcEdge.Capacity = Capacity; - SrcEdge.Flow = 0; - SrcEdge.RevEdgeIndex = Edges[Dst].size(); - - Edge DstEdge; - DstEdge.Dst = Src; - DstEdge.Cost = -Cost; - DstEdge.Capacity = 0; - DstEdge.Flow = 0; - DstEdge.RevEdgeIndex = Edges[Src].size(); - - Edges[Src].push_back(SrcEdge); - Edges[Dst].push_back(DstEdge); - } - - /// Adding an edge to the network of infinite capacity and a given cost. - void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { - addEdge(Src, Dst, INF, Cost); - } - - /// Get the total flow from a given source node. - /// Returns a list of pairs (target node, amount of flow to the target). - const std::vector> getFlow(uint64_t Src) const { - std::vector> Flow; - for (auto &Edge : Edges[Src]) { - if (Edge.Flow > 0) - Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); - } - return Flow; - } - - /// Get the total flow between a pair of nodes. - int64_t getFlow(uint64_t Src, uint64_t Dst) const { - int64_t Flow = 0; - for (auto &Edge : Edges[Src]) { - if (Edge.Dst == Dst) { - Flow += Edge.Flow; - } - } - return Flow; - } - - /// A cost of increasing a block's count by one. - static constexpr int64_t AuxCostInc = 10; - /// A cost of decreasing a block's count by one. - static constexpr int64_t AuxCostDec = 20; - /// A cost of increasing a count of zero-weight block by one. - static constexpr int64_t AuxCostIncZero = 11; - /// A cost of increasing the entry block's count by one. - static constexpr int64_t AuxCostIncEntry = 40; - /// A cost of decreasing the entry block's count by one. - static constexpr int64_t AuxCostDecEntry = 10; - /// A cost of taking an unlikely jump. - static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; - -private: - /// Check for existence of an augmenting path with a positive capacity. - bool findAugmentingPath() { - // Initialize data structures - for (auto &Node : Nodes) { - Node.Distance = INF; - Node.ParentNode = uint64_t(-1); - Node.ParentEdgeIndex = uint64_t(-1); - Node.Taken = false; - } - - std::queue Queue; - Queue.push(Source); - Nodes[Source].Distance = 0; - Nodes[Source].Taken = true; - while (!Queue.empty()) { - uint64_t Src = Queue.front(); - Queue.pop(); - Nodes[Src].Taken = false; - // Although the residual network contains edges with negative costs - // (in particular, backward edges), it can be shown that there are no - // negative-weight cycles and the following two invariants are maintained: - // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, - // where Dist is the length of the shortest path between two nodes. This - // allows to prune the search-space of the path-finding algorithm using - // the following early-stop criteria: - // -- If we find a path with zero-distance from Source to Target, stop the - // search, as the path is the shortest since Dist[Source, Target] >= 0; - // -- If we have Dist[Source, V] > Dist[Source, Target], then do not - // process node V, as it is guaranteed _not_ to be on a shortest path - // from Source to Target; it follows from inequalities - // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] - // >= Dist[Source, V] - if (Nodes[Target].Distance == 0) - break; - if (Nodes[Src].Distance > Nodes[Target].Distance) - continue; - - // Process adjacent edges - for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { - auto &Edge = Edges[Src][EdgeIdx]; - if (Edge.Flow < Edge.Capacity) { - uint64_t Dst = Edge.Dst; - int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; - if (Nodes[Dst].Distance > NewDistance) { - // Update the distance and the parent node/edge - Nodes[Dst].Distance = NewDistance; - Nodes[Dst].ParentNode = Src; - Nodes[Dst].ParentEdgeIndex = EdgeIdx; - // Add the node to the queue, if it is not there yet - if (!Nodes[Dst].Taken) { - Queue.push(Dst); - Nodes[Dst].Taken = true; - } - } - } - } - } - - return Nodes[Target].Distance != INF; - } - - /// Update the current flow along the augmenting path. - void augmentFlowAlongPath() { - // Find path capacity - int64_t PathCapacity = INF; - uint64_t Now = Target; - while (Now != Source) { - uint64_t Pred = Nodes[Now].ParentNode; - auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; - PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); - Now = Pred; - } - - assert(PathCapacity > 0 && "found incorrect augmenting path"); - - // Update the flow along the path - Now = Target; - while (Now != Source) { - uint64_t Pred = Nodes[Now].ParentNode; - auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; - auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; - - Edge.Flow += PathCapacity; - RevEdge.Flow -= PathCapacity; - - Now = Pred; - } - } - - /// An node in a flow network. - struct Node { - /// The cost of the cheapest path from the source to the current node. - int64_t Distance; - /// The node preceding the current one in the path. - uint64_t ParentNode; - /// The index of the edge between ParentNode and the current node. - uint64_t ParentEdgeIndex; - /// An indicator of whether the current node is in a queue. - bool Taken; - }; - /// An edge in a flow network. - struct Edge { - /// The cost of the edge. - int64_t Cost; - /// The capacity of the edge. - int64_t Capacity; - /// The current flow on the edge. - int64_t Flow; - /// The destination node of the edge. - uint64_t Dst; - /// The index of the reverse edge between Dst and the current node. - uint64_t RevEdgeIndex; - }; - - /// The set of network nodes. - std::vector Nodes; - /// The set of network edges. - std::vector> Edges; - /// Source node of the flow. - uint64_t Source; - /// Target (sink) node of the flow. - uint64_t Target; -}; - -/// Initializing flow network for a given function. -/// -/// Every block is split into three nodes that are responsible for (i) an -/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or -/// reduction of the block weight. -void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { - uint64_t NumBlocks = Func.Blocks.size(); - assert(NumBlocks > 1 && "Too few blocks in a function"); - LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); - - // Pre-process data: make sure the entry weight is at least 1 - if (Func.Blocks[Func.Entry].Weight == 0) { - Func.Blocks[Func.Entry].Weight = 1; - } - // Introducing dummy source/sink pairs to allow flow circulation. - // The nodes corresponding to blocks of Func have indicies in the range - // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. - uint64_t S = 3 * NumBlocks; - uint64_t T = S + 1; - uint64_t S1 = S + 2; - uint64_t T1 = S + 3; - - Network.initialize(3 * NumBlocks + 4, S1, T1); - - // Create three nodes for every block of the function - for (uint64_t B = 0; B < NumBlocks; B++) { - auto &Block = Func.Blocks[B]; - assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && - "non-zero weight of a block w/o weight except for an entry"); - - // Split every block into two nodes - uint64_t Bin = 3 * B; - uint64_t Bout = 3 * B + 1; - uint64_t Baux = 3 * B + 2; - if (Block.Weight > 0) { - Network.addEdge(S1, Bout, Block.Weight, 0); - Network.addEdge(Bin, T1, Block.Weight, 0); - } - - // Edges from S and to T - assert((!Block.isEntry() || !Block.isExit()) && - "a block cannot be an entry and an exit"); - if (Block.isEntry()) { - Network.addEdge(S, Bin, 0); - } else if (Block.isExit()) { - Network.addEdge(Bout, T, 0); - } - - // An auxiliary node to allow increase/reduction of block counts: - // We assume that decreasing block counts is more expensive than increasing, - // and thus, setting separate costs here. In the future we may want to tune - // the relative costs so as to maximize the quality of generated profiles. - int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; - int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; - if (Block.UnknownWeight) { - // Do not penalize changing weights of blocks w/o known profile count - AuxCostInc = 0; - AuxCostDec = 0; - } else { - // Increasing the count for "cold" blocks with zero initial count is more - // expensive than for "hot" ones - if (Block.Weight == 0) { - AuxCostInc = MinCostMaxFlow::AuxCostIncZero; - } - // Modifying the count of the entry block is expensive - if (Block.isEntry()) { - AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; - AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; - } - } - // For blocks with self-edges, do not penalize a reduction of the count, - // as all of the increase can be attributed to the self-edge - if (Block.HasSelfEdge) { - AuxCostDec = 0; - } - - Network.addEdge(Bin, Baux, AuxCostInc); - Network.addEdge(Baux, Bout, AuxCostInc); - if (Block.Weight > 0) { - Network.addEdge(Bout, Baux, AuxCostDec); - Network.addEdge(Baux, Bin, AuxCostDec); - } - } - - // Creating edges for every jump - for (auto &Jump : Func.Jumps) { - uint64_t Src = Jump.Source; - uint64_t Dst = Jump.Target; - if (Src != Dst) { - uint64_t SrcOut = 3 * Src + 1; - uint64_t DstIn = 3 * Dst; - uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; - Network.addEdge(SrcOut, DstIn, Cost); - } - } - - // Make sure we have a valid flow circulation - Network.addEdge(T, S, 0); -} - -/// Extract resulting block and edge counts from the flow network. -void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { - uint64_t NumBlocks = Func.Blocks.size(); - - // Extract resulting block counts - for (uint64_t Src = 0; Src < NumBlocks; Src++) { - auto &Block = Func.Blocks[Src]; - uint64_t SrcOut = 3 * Src + 1; - int64_t Flow = 0; - for (auto &Adj : Network.getFlow(SrcOut)) { - uint64_t DstIn = Adj.first; - int64_t DstFlow = Adj.second; - bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); - if (!IsAuxNode || Block.HasSelfEdge) { - Flow += DstFlow; - } - } - Block.Flow = Flow; - assert(Flow >= 0 && "negative block flow"); - } - - // Extract resulting jump counts - for (auto &Jump : Func.Jumps) { - uint64_t Src = Jump.Source; - uint64_t Dst = Jump.Target; - int64_t Flow = 0; - if (Src != Dst) { - uint64_t SrcOut = 3 * Src + 1; - uint64_t DstIn = 3 * Dst; - Flow = Network.getFlow(SrcOut, DstIn); - } else { - uint64_t SrcOut = 3 * Src + 1; - uint64_t SrcAux = 3 * Src + 2; - int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); - if (AuxFlow > 0) - Flow = AuxFlow; - } - Jump.Flow = Flow; - assert(Flow >= 0 && "negative jump flow"); - } -} - -#ifndef NDEBUG -/// Verify that the computed flow values satisfy flow conservation rules -void verifyWeights(const FlowFunction &Func) { - const uint64_t NumBlocks = Func.Blocks.size(); - auto InFlow = std::vector(NumBlocks, 0); - auto OutFlow = std::vector(NumBlocks, 0); - for (auto &Jump : Func.Jumps) { - InFlow[Jump.Target] += Jump.Flow; - OutFlow[Jump.Source] += Jump.Flow; - } - - uint64_t TotalInFlow = 0; - uint64_t TotalOutFlow = 0; - for (uint64_t I = 0; I < NumBlocks; I++) { - auto &Block = Func.Blocks[I]; - if (Block.isEntry()) { - TotalInFlow += Block.Flow; - assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); - } else if (Block.isExit()) { - TotalOutFlow += Block.Flow; - assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); - } else { - assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); - assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); - } - } - assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); -} -#endif - -} // end of anonymous namespace - -/// Apply the profile inference algorithm for a given flow function -void llvm::applyFlowInference(FlowFunction &Func) { - // Create and apply an inference network model - auto InferenceNetwork = MinCostMaxFlow(); - initializeNetwork(InferenceNetwork, Func); - InferenceNetwork.run(); - - // Extract flow values for every block and every edge - extractWeights(InferenceNetwork, Func); - -#ifndef NDEBUG - // Verify the result - verifyWeights(Func); -#endif -} diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp index ea0e834..6d995cf4 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -34,10 +34,6 @@ cl::opt NoWarnSampleUnused( cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. ")); -cl::opt SampleProfileUseProfi( - "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Use profi to infer block and edge counts.")); - namespace sampleprofutil { /// Return true if the given callsite is hot wrt to hot cutoff threshold. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof deleted file mode 100644 index e995a04..0000000 --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof +++ /dev/null @@ -1,23 +0,0 @@ -test_1:23968:0 - 1: 100 - 2: 60 - 3: 40 - !CFGChecksum: 4294967295 - -test_2:23968:0 - 1: 100 - 3: 10 - !CFGChecksum: 37753817093 - -test_3:10000:0 - 3: 13 - 5: 89 - !CFGChecksum: 69502983527 - -sum_of_squares:23968:0 - 2: 5993 - 3: 1 - 4: 5992 - 5: 5992 - 8: 5992 - !CFGChecksum: 175862120757 diff --git a/llvm/test/Transforms/SampleProfile/profile-inference.ll b/llvm/test/Transforms/SampleProfile/profile-inference.ll deleted file mode 100644 index 7f40358..0000000 --- a/llvm/test/Transforms/SampleProfile/profile-inference.ll +++ /dev/null @@ -1,245 +0,0 @@ -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2 - -; The test verifies that profile inference correctly builds branch probabilities -; from sampling-based block counts. -; -; +---------+ +----------+ -; | b3 [40] | <-- | b1 [100] | -; +---------+ +----------+ -; | -; | -; v -; +----------+ -; | b2 [60] | -; +----------+ - -@yydebug = dso_local global i32 0, align 4 - -; Function Attrs: nounwind uwtable -define dso_local i32 @test_1() #0 { -b1: - call void @llvm.pseudoprobe(i64 7964825052912775246, i64 1, i32 0, i64 -1) - %0 = load i32, i32* @yydebug, align 4 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %b2, label %b3 -; CHECK: edge b1 -> b2 probability is 0x4ccccccd / 0x80000000 = 60.00% -; CHECK: edge b1 -> b3 probability is 0x33333333 / 0x80000000 = 40.00% -; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 100 - -b2: - call void @llvm.pseudoprobe(i64 7964825052912775246, i64 2, i32 0, i64 -1) - ret i32 %0 -; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 60 - -b3: - call void @llvm.pseudoprobe(i64 7964825052912775246, i64 3, i32 0, i64 -1) - ret i32 %0 -; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 40 -} - - -; The test verifies that profile inference correctly builds branch probabilities -; from sampling-based block counts in the presence of "dangling" probes (whose -; block counts are missing). -; -; +---------+ +----------+ -; | b3 [10] | <-- | b1 [100] | -; +---------+ +----------+ -; | -; | -; v -; +----------+ -; | b2 [?] | -; +----------+ - -; Function Attrs: nounwind uwtable -define dso_local i32 @test_2() #0 { -b1: - call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 1, i32 0, i64 -1) - %0 = load i32, i32* @yydebug, align 4 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %b2, label %b3 -; CHECK: edge b1 -> b2 probability is 0x73333333 / 0x80000000 = 90.00% -; CHECK: edge b1 -> b3 probability is 0x0ccccccd / 0x80000000 = 10.00% -; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 100 - -b2: - call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 2, i32 0, i64 -1) - ret i32 %0 -; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 90 - -b3: - call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 3, i32 0, i64 -1) - ret i32 %0 -} -; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 10 - - -; The test verifies that profi is able to infer block counts from hot subgraphs. -; -; +---------+ +---------+ -; | b4 [?] | <-- | b1 [?] | -; +---------+ +---------+ -; | | -; | | -; v v -; +---------+ +---------+ -; | b5 [89] | | b2 [?] | -; +---------+ +---------+ -; | -; | -; v -; +---------+ -; | b3 [13] | -; +---------+ - -; Function Attrs: nounwind uwtable -define dso_local i32 @test_3() #0 { -b1: - call void @llvm.pseudoprobe(i64 1649282507922421973, i64 1, i32 0, i64 -1) - %0 = load i32, i32* @yydebug, align 4 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %b2, label %b4 -; CHECK: edge b1 -> b2 probability is 0x10505050 / 0x80000000 = 12.75% -; CHECK: edge b1 -> b4 probability is 0x6fafafb0 / 0x80000000 = 87.25% -; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 102 - -b2: - call void @llvm.pseudoprobe(i64 1649282507922421973, i64 2, i32 0, i64 -1) - br label %b3 -; CHECK: edge b2 -> b3 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 13 - -b3: - call void @llvm.pseudoprobe(i64 1649282507922421973, i64 3, i32 0, i64 -1) - ret i32 %0 -; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 13 - -b4: - call void @llvm.pseudoprobe(i64 1649282507922421973, i64 4, i32 0, i64 -1) - br label %b5 -; CHECK: edge b4 -> b5 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b4: float = {{.*}}, int = {{.*}}, count = 89 - -b5: - call void @llvm.pseudoprobe(i64 1649282507922421973, i64 5, i32 0, i64 -1) - ret i32 %0 -; CHECK2: - b5: float = {{.*}}, int = {{.*}}, count = 89 -} - - -; A larger test to verify that profile inference correctly identifies hot parts -; of the control-flow graph. -; -; +-----------+ -; | b1 [?] | -; +-----------+ -; | -; | -; v -; +--------+ +-----------+ -; | b3 [1] | <-- | b2 [5993] | -; +--------+ +-----------+ -; | | -; | | -; | v -; | +-----------+ +--------+ -; | | b4 [5992] | --> | b6 [?] | -; | +-----------+ +--------+ -; | | | -; | | | -; | v | -; | +-----------+ | -; | | b5 [5992] | | -; | +-----------+ | -; | | | -; | | | -; | v | -; | +-----------+ | -; | | b7 [?] | | -; | +-----------+ | -; | | | -; | | | -; | v | -; | +-----------+ | -; | | b8 [5992] | <-----+ -; | +-----------+ -; | | -; | | -; | v -; | +-----------+ -; +----------> | b9 [?] | -; +-----------+ - -; Function Attrs: nounwind uwtable -define dso_local i32 @sum_of_squares() #0 { -b1: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 1, i32 0, i64 -1) - %0 = load i32, i32* @yydebug, align 4 - %cmp = icmp ne i32 %0, 0 - br label %b2 -; CHECK: edge b1 -> b2 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 5993 - -b2: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 2, i32 0, i64 -1) - br i1 %cmp, label %b4, label %b3 -; CHECK: edge b2 -> b4 probability is 0x7ffa8844 / 0x80000000 = 99.98% -; CHECK: edge b2 -> b3 probability is 0x000577bc / 0x80000000 = 0.02% -; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 5993 - -b3: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 3, i32 0, i64 -1) - br label %b9 -; CHECK: edge b3 -> b9 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 1 - -b4: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 4, i32 0, i64 -1) - br i1 %cmp, label %b5, label %b6 -; CHECK: edge b4 -> b5 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK: edge b4 -> b6 probability is 0x00000000 / 0x80000000 = 0.00% -; CHECK2: - b4: float = {{.*}}, int = {{.*}}, count = 5992 - -b5: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 5, i32 0, i64 -1) - br label %b7 -; CHECK: edge b5 -> b7 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b5: float = {{.*}}, int = {{.*}}, count = 5992 - -b6: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 6, i32 0, i64 -1) - br label %b8 -; CHECK: edge b6 -> b8 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b6: float = {{.*}}, int = {{.*}}, count = 0 - -b7: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 7, i32 0, i64 -1) - br label %b8 -; CHECK: edge b7 -> b8 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b7: float = {{.*}}, int = {{.*}}, count = 5992 - -b8: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 8, i32 0, i64 -1) - br label %b9 -; CHECK: edge b8 -> b9 probability is 0x80000000 / 0x80000000 = 100.00% -; CHECK2: - b8: float = {{.*}}, int = {{.*}}, count = 5992 - -b9: - call void @llvm.pseudoprobe(i64 -907520326213521421, i64 9, i32 0, i64 -1) - ret i32 %0 -} -; CHECK2: - b9: float = {{.*}}, int = {{.*}}, count = 5993 - -declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 - -attributes #0 = { noinline nounwind uwtable "use-sample-profile"} -attributes #1 = { nounwind } - -!llvm.pseudo_probe_desc = !{!6, !7, !8, !9} - -!6 = !{i64 7964825052912775246, i64 4294967295, !"test_1", null} -!7 = !{i64 -6216829535442445639, i64 37753817093, !"test_2", null} -!8 = !{i64 1649282507922421973, i64 69502983527, !"test_3", null} -!9 = !{i64 -907520326213521421, i64 175862120757, !"sum_of_squares", null} -- 2.7.4