[SampleFDO] Flow Sensitive Sample FDO (FSAFDO) profile loader
authorRong Xu <xur@google.com>
Wed, 18 Aug 2021 23:59:02 +0000 (16:59 -0700)
committerRong Xu <xur@google.com>
Thu, 19 Aug 2021 01:37:35 +0000 (18:37 -0700)
This patch implements Flow Sensitive Sample FDO (FSAFDO) profile
loader. We have two profile loaders for FS profile,
one before RegAlloc and one before BlockPlacement.

To enable it, when -fprofile-sample-use=<profile> is specified,
add "-enable-fs-discriminator=true \
     -disable-ra-fsprofile-loader=false \
     -disable-layout-fsprofile-loader=false"
to turn on the FS profile loaders.

Differential Revision: https://reviews.llvm.org/D107878

19 files changed:
clang/lib/CodeGen/BackendUtil.cpp
llvm/include/llvm/CodeGen/MIRSampleProfile.h [new file with mode: 0644]
llvm/include/llvm/CodeGen/MachineDominators.h
llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
llvm/include/llvm/CodeGen/Passes.h
llvm/include/llvm/IR/DebugInfoMetadata.h
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/Passes/PassBuilder.h
llvm/include/llvm/Support/PGOOptions.h [new file with mode: 0644]
llvm/include/llvm/Target/TargetMachine.h
llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
llvm/lib/CodeGen/CMakeLists.txt
llvm/lib/CodeGen/MIRSampleProfile.cpp [new file with mode: 0644]
llvm/lib/CodeGen/TargetPassConfig.cpp
llvm/lib/LTO/LTOBackend.cpp
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/CodeGen/X86/Inputs/fsloader.afdo [new file with mode: 0644]
llvm/test/CodeGen/X86/fsafdo_test2.ll
llvm/tools/opt/NewPMDriver.cpp

index 9e4e40a..404d268 100644 (file)
@@ -1261,6 +1261,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
                           "", PGOOptions::NoAction, PGOOptions::CSIRInstr,
                           CodeGenOpts.DebugInfoForProfiling);
   }
+  if (TM)
+    TM->setPGOOption(PGOOpt);
 
   PipelineTuningOptions PTO;
   PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
new file mode 100644 (file)
index 0000000..7872daf
--- /dev/null
@@ -0,0 +1,81 @@
+//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supoorting functions for machine level Sample FDO
+// loader. This is used in Flow Sensitive SampelFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+using namespace sampleprof;
+
+class MIRProfileLoader;
+class MIRProfileLoaderPass : public MachineFunctionPass {
+  MachineFunction *MF;
+  std::string ProfileFileName;
+  FSDiscriminatorPass P;
+  unsigned LowBit;
+  unsigned HighBit;
+
+public:
+  static char ID;
+  /// FS bits will only use the '1' bits in the Mask.
+  MIRProfileLoaderPass(std::string FileName = "",
+                       std::string RemappingFileName = "",
+                       FSDiscriminatorPass P = FSDiscriminatorPass::Pass1)
+      : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
+        MIRSampleLoader(
+            std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+    LowBit = getFSPassBitBegin(P);
+    HighBit = getFSPassBitEnd(P);
+    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+  }
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+private:
+  void init(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &) override;
+  bool doInitialization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
+  /// Hold the information of the basic block frequency.
+  MachineBlockFrequencyInfo *MBFI;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H
index 46bf73c..00bfa1a 100644 (file)
@@ -112,6 +112,12 @@ public:
     return DT->dominates(A, B);
   }
 
+  void getDescendants(MachineBasicBlock *A,
+                      SmallVectorImpl<MachineBasicBlock *> &Result) {
+    applySplitCriticalEdges();
+    DT->getDescendants(A, Result);
+  }
+
   bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
     applySplitCriticalEdges();
     return DT->dominates(A, B);
index 8cc5909..285b858 100644 (file)
@@ -118,6 +118,12 @@ public:
       : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
                                       PassName, RemarkName, Loc, MBB) {}
 
+  MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+                                    const MachineInstr *MI)
+      : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
+                                      PassName, RemarkName, MI->getDebugLoc(),
+                                      MI->getParent()) {}
+
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
   }
index da1bab7..09aad65 100644 (file)
@@ -171,6 +171,9 @@ namespace llvm {
   /// This pass adds flow sensitive discriminators.
   extern char &MIRAddFSDiscriminatorsID;
 
+  /// This pass reads flow sensitive profile.
+  extern char &MIRProfileLoaderPassID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -513,6 +516,11 @@ namespace llvm {
   FunctionPass *
   createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
 
+  /// Read Flow Sensitive Profile.
+  FunctionPass *createMIRProfileLoaderPass(std::string File,
+                                           std::string RemappingFile,
+                                           sampleprof::FSDiscriminatorPass P);
+
   /// Creates MIR Debugify pass. \see MachineDebugify.cpp
   ModulePass *createDebugifyMachineModulePass();
 
index b59a7b3..2f116ec 100644 (file)
@@ -2212,7 +2212,8 @@ unsigned DILocation::getCopyIdentifier() const {
   return getCopyIdentifierFromDiscriminator(getDiscriminator());
 }
 
-Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *>
+DILocation::cloneWithBaseDiscriminator(unsigned D) const {
   unsigned BD, DF, CI;
 
   if (EnableFSDiscriminator) {
@@ -2230,7 +2231,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
   return None;
 }
 
-Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *>
+DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
 
   DF *= getDuplicationFactor();
index 365240d..02e2e95 100644 (file)
@@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
 void initializeAAResultsWrapperPassPass(PassRegistry&);
 void initializeADCELegacyPassPass(PassRegistry&);
 void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
+void initializeAddFSDiscriminatorsPass(PassRegistry &);
 void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
 void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
 void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMIRProfileLoaderPassPass(PassRegistry &);
 void initializeMemProfilerLegacyPassPass(PassRegistry &);
 void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
 void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
index 9ab7bd4..943ad31 100644 (file)
@@ -20,6 +20,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/Instrumentation.h"
@@ -32,49 +33,6 @@ class AAManager;
 class TargetMachine;
 class ModuleSummaryIndex;
 
-/// A struct capturing PGO tunables.
-struct PGOOptions {
-  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
-  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
-  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
-             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
-             CSPGOAction CSAction = NoCSAction,
-             bool DebugInfoForProfiling = false,
-             bool PseudoProbeForProfiling = false)
-      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
-        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
-        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
-                                                  (Action == SampleUse &&
-                                                   !PseudoProbeForProfiling)),
-        PseudoProbeForProfiling(PseudoProbeForProfiling) {
-    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
-    // callback with IRUse action without ProfileFile.
-
-    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
-    assert(this->CSAction == NoCSAction ||
-           (this->Action != IRInstr && this->Action != SampleUse));
-
-    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
-    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
-    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
-    // a profile.
-    assert(this->CSAction != CSIRUse || this->Action == IRUse);
-
-    // If neither Action nor CSAction, DebugInfoForProfiling or
-    // PseudoProbeForProfiling needs to be true.
-    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
-           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
-  }
-  std::string ProfileFile;
-  std::string CSProfileGenFile;
-  std::string ProfileRemappingFile;
-  PGOAction Action;
-  CSPGOAction CSAction;
-  bool DebugInfoForProfiling;
-  bool PseudoProbeForProfiling;
-};
-
 /// Tunable parameters for passes in the default pipelines.
 class PipelineTuningOptions {
 public:
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
new file mode 100644 (file)
index 0000000..2141e21
--- /dev/null
@@ -0,0 +1,65 @@
+//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Define option tunables for PGO.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PGOOPTIONS_H
+#define LLVM_SUPPORT_PGOOPTIONS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct capturing PGO tunables.
+struct PGOOptions {
+  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+             CSPGOAction CSAction = NoCSAction,
+             bool DebugInfoForProfiling = false,
+             bool PseudoProbeForProfiling = false)
+      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+                                                  (Action == SampleUse &&
+                                                   !PseudoProbeForProfiling)),
+        PseudoProbeForProfiling(PseudoProbeForProfiling) {
+    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+    // callback with IRUse action without ProfileFile.
+
+    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+    assert(this->CSAction == NoCSAction ||
+           (this->Action != IRInstr && this->Action != SampleUse));
+
+    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+    // a profile.
+    assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+    // If neither Action nor CSAction, DebugInfoForProfiling or
+    // PseudoProbeForProfiling needs to be true.
+    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+  }
+  std::string ProfileFile;
+  std::string CSProfileGenFile;
+  std::string ProfileRemappingFile;
+  PGOAction Action;
+  CSPGOAction CSAction;
+  bool DebugInfoForProfiling;
+  bool PseudoProbeForProfiling;
+};
+} // namespace llvm
+
+#endif
index dd17af4..9e88538 100644 (file)
@@ -13,6 +13,7 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
@@ -20,6 +21,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetOptions.h"
 #include <string>
@@ -110,6 +112,9 @@ protected: // Can only create subclasses.
   unsigned RequireStructuredCFG : 1;
   unsigned O0WantsFastISel : 1;
 
+  // PGO related tunables.
+  Optional<PGOOptions> PGOOption = None;
+
 public:
   const TargetOptions DefaultOptions;
   mutable TargetOptions Options;
@@ -303,6 +308,9 @@ public:
     return false;
   }
 
+  void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
+  const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
+
   /// If the specified generic pointer could be assumed as a pointer to a
   /// specific address space, return that address space.
   ///
index e0759d3..2a510e6 100644 (file)
@@ -56,15 +56,20 @@ template <> struct IRTraits<BasicBlock> {
   using FunctionT = Function;
   using BlockFrequencyInfoT = BlockFrequencyInfo;
   using LoopT = Loop;
-  using LoopInfoT = LoopInfo;
+  using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
+  using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
+  using PostDominatorTreeT = PostDominatorTree;
+  using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
   using OptRemarkEmitterT = OptimizationRemarkEmitter;
   using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
-  using DominatorTreeT = DominatorTree;
-  using PostDominatorTreeT = PostDominatorTree;
+  using PredRangeT = pred_range;
+  using SuccRangeT = succ_range;
   static Function &getFunction(Function &F) { return F; }
   static const BasicBlock *getEntryBB(const Function *F) {
     return &F->getEntryBlock();
   }
+  static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
+  static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
 };
 
 } // end namespace afdo_detail
@@ -76,7 +81,8 @@ extern cl::opt<bool> NoWarnSampleUnused;
 
 template <typename BT> class SampleProfileLoaderBaseImpl {
 public:
-  SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
+  SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
+      : Filename(Name), RemappingFilename(RemapName) {}
   void dump() { Reader->dump(); }
 
   using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@@ -85,14 +91,19 @@ public:
       typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
   using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
   using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
-  using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
+  using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
+  using DominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
+  using PostDominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
+  using PostDominatorTreeT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
   using OptRemarkEmitterT =
       typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
   using OptRemarkAnalysisT =
       typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
-  using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
-  using PostDominatorTreeT =
-      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
+  using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
+  using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
 
   using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
   using EquivalenceClassMap =
@@ -112,6 +123,12 @@ protected:
   const BasicBlockT *getEntryBB(const FunctionT *F) {
     return afdo_detail::IRTraits<BT>::getEntryBB(F);
   }
+  PredRangeT getPredecessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getPredecessors(BB);
+  }
+  SuccRangeT getSuccessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getSuccessors(BB);
+  }
 
   unsigned getFunctionLoc(FunctionT &Func);
   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@@ -129,12 +146,11 @@ protected:
   void findEquivalencesFor(BasicBlockT *BB1,
                            ArrayRef<BasicBlockT *> Descendants,
                            PostDominatorTreeT *DomTree);
-
   void propagateWeights(FunctionT &F);
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(FunctionT &F);
   bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
-  void clearFunctionData();
+  void clearFunctionData(bool ResetDT = true);
   void computeDominanceAndLoopInfo(FunctionT &F);
   bool
   computeAndPropagateWeights(FunctionT &F,
@@ -168,9 +184,9 @@ protected:
   EquivalenceClassMap EquivalenceClass;
 
   /// Dominance, post-dominance and loop information.
-  std::unique_ptr<DominatorTreeT> DT;
-  std::unique_ptr<PostDominatorTreeT> PDT;
-  std::unique_ptr<LoopInfoT> LI;
+  DominatorTreePtrT DT;
+  PostDominatorTreePtrT PDT;
+  LoopInfoPtrT LI;
 
   /// Predecessors for each basic block in the CFG.
   BlockEdgeMap Predecessors;
@@ -190,6 +206,9 @@ protected:
   /// Name of the profile file to load.
   std::string Filename;
 
+  /// Name of the profile remapping file to load.
+  std::string RemappingFilename;
+
   /// Profile Summary Info computed from sample profile.
   ProfileSummaryInfo *PSI = nullptr;
 
@@ -199,15 +218,17 @@ protected:
 
 /// Clear all the per-function data used to load samples and propagate weights.
 template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
+void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
   BlockWeights.clear();
   EdgeWeights.clear();
   VisitedBlocks.clear();
   VisitedEdges.clear();
   EquivalenceClass.clear();
-  DT = nullptr;
-  PDT = nullptr;
-  LI = nullptr;
+  if (ResetDT) {
+    DT = nullptr;
+    PDT = nullptr;
+    LI = nullptr;
+  }
   Predecessors.clear();
   Successors.clear();
   CoverageTracker.clear();
@@ -475,7 +496,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
     // class by making BB2's equivalence class be BB1.
     DominatedBBs.clear();
     DT->getDescendants(BB1, DominatedBBs);
-    findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+    findEquivalencesFor(BB1, DominatedBBs, &*PDT);
 
     LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
   }
@@ -692,7 +713,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     SmallPtrSet<BasicBlockT *, 16> Visited;
     if (!Predecessors[B1].empty())
       llvm_unreachable("Found a stale predecessors list in a basic block.");
-    for (BasicBlockT *B2 : predecessors(B1))
+    for (auto *B2 : getPredecessors(B1))
       if (Visited.insert(B2).second)
         Predecessors[B1].push_back(B2);
 
@@ -700,7 +721,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     Visited.clear();
     if (!Successors[B1].empty())
       llvm_unreachable("Found a stale successors list in a basic block.");
-    for (BasicBlockT *B2 : successors(B1))
+    for (auto *B2 : getSuccessors(B1))
       if (Visited.insert(B2).second)
         Successors[B1].push_back(B2);
   }
@@ -911,12 +932,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
 template <typename BT>
 void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
     FunctionT &F) {
-  DT.reset(new DominatorTreeT);
+  DT.reset(new DominatorTree);
   DT->recalculate(F);
 
   PDT.reset(new PostDominatorTree(F));
 
-  LI.reset(new LoopInfoT);
+  LI.reset(new LoopInfo);
   LI->analyze(*DT);
 }
 
index 398253f..605aee3 100644 (file)
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
   MachineTraceMetrics.cpp
   MachineVerifier.cpp
   MIRFSDiscriminator.cpp
+  MIRSampleProfile.cpp
   MIRYamlMapping.cpp
   ModuloSchedule.cpp
   MultiHazardRecognizer.cpp
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644 (file)
index 0000000..f4f8d13
--- /dev/null
@@ -0,0 +1,335 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+    "show-fs-branchprob", cl::Hidden, cl::init(false),
+    cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+    "fs-profile-debug-prob-diff-threshold", cl::init(10),
+    cl::desc("Only show debug message if the branch probility is greater than "
+             "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+    "fs-profile-debug-bw-threshold", cl::init(10000),
+    cl::desc("Only show debug message if the source branch weight is greater "
+             " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+                                  cl::init(false),
+                                  cl::desc("View BFI after MIR loader"));
+
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+                      "Load MIR Sample Profile",
+                      /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+                    /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
+                                               std::string RemappingFile,
+                                               FSDiscriminatorPass P) {
+  return new MIRProfileLoaderPass(File, RemappingFile, P);
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+  using InstructionT = MachineInstr;
+  using BasicBlockT = MachineBasicBlock;
+  using FunctionT = MachineFunction;
+  using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+  using LoopT = MachineLoop;
+  using LoopInfoPtrT = MachineLoopInfo *;
+  using DominatorTreePtrT = MachineDominatorTree *;
+  using PostDominatorTreePtrT = MachinePostDominatorTree *;
+  using PostDominatorTreeT = MachinePostDominatorTree;
+  using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+  using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+  using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+  using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+  static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+  static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+    return GraphTraits<const MachineFunction *>::getEntryNode(F);
+  }
+  static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+    return BB->predecessors();
+  }
+  static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+    return BB->successors();
+  }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+    : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+public:
+  void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+                   MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+                   MachineOptimizationRemarkEmitter *MORE) {
+    DT = MDT;
+    PDT = MPDT;
+    LI = MLI;
+    BFI = MBFI;
+    ORE = MORE;
+  }
+  void setFSPass(FSDiscriminatorPass Pass) {
+    P = Pass;
+    LowBit = getFSPassBitBegin(P);
+    HighBit = getFSPassBitEnd(P);
+    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+  }
+
+  MIRProfileLoader(StringRef Name, StringRef RemapName)
+      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
+  }
+
+  void setBranchProbs(MachineFunction &F);
+  bool runOnFunction(MachineFunction &F);
+  bool doInitialization(Module &M);
+  bool isValid() const { return ProfileIsValid; }
+
+protected:
+  friend class SampleCoverageTracker;
+
+  /// Hold the information of the basic block frequency.
+  MachineBlockFrequencyInfo *BFI;
+
+  /// PassNum is the sequence number this pass is called, start from 1.
+  FSDiscriminatorPass P;
+
+  // LowBit in the FS discriminator used by this instance. Note the number is
+  // 0-based. Base discrimnator use bit 0 to bit 11.
+  unsigned LowBit;
+  // HighwBit in the FS discriminator used by this instance. Note the number
+  // is 0-based.
+  unsigned HighBit;
+
+  bool ProfileIsValid = true;
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<
+    MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+  for (auto &BI : F) {
+    MachineBasicBlock *BB = &BI;
+    if (BB->succ_size() < 2)
+      continue;
+    const MachineBasicBlock *EC = EquivalenceClass[BB];
+    uint64_t BBWeight = BlockWeights[EC];
+    uint64_t SumEdgeWeight = 0;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                          SE = BB->succ_end();
+         SI != SE; ++SI) {
+      MachineBasicBlock *Succ = *SI;
+      Edge E = std::make_pair(BB, Succ);
+      SumEdgeWeight += EdgeWeights[E];
+    }
+
+    if (BBWeight != SumEdgeWeight) {
+      LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+                        << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+                        << "\n");
+      BBWeight = SumEdgeWeight;
+    }
+    if (BBWeight == 0) {
+      LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+      continue;
+    }
+
+#ifndef NDEBUG
+    uint64_t BBWeightOrig = BBWeight;
+#endif
+    uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+    uint32_t Factor = 1;
+    if (BBWeight > MaxWeight) {
+      Factor = BBWeight / MaxWeight + 1;
+      BBWeight /= Factor;
+      LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+    }
+
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                          SE = BB->succ_end();
+         SI != SE; ++SI) {
+      MachineBasicBlock *Succ = *SI;
+      Edge E = std::make_pair(BB, Succ);
+      uint64_t EdgeWeight = EdgeWeights[E];
+      EdgeWeight /= Factor;
+
+      assert(BBWeight >= EdgeWeight &&
+             "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+      BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+      BranchProbability NewProb(EdgeWeight, BBWeight);
+      if (OldProb == NewProb)
+        continue;
+      BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+      if (!ShowFSBranchProb)
+        continue;
+      bool Show = false;
+      BranchProbability Diff;
+      if (OldProb > NewProb)
+        Diff = OldProb - NewProb;
+      else
+        Diff = NewProb - OldProb;
+      Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+      Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+      auto DIL = BB->findBranchDebugLoc();
+      auto SuccDIL = Succ->findBranchDebugLoc();
+      if (Show) {
+        dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+               << Succ->getNumber() << "): ";
+        if (DIL)
+          dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+                 << DIL->getColumn();
+        if (SuccDIL)
+          dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+                 << ":" << SuccDIL->getColumn();
+        dbgs() << " W=" << BBWeightOrig << "  " << OldProb << " --> " << NewProb
+               << "\n";
+      }
+#endif
+    }
+  }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+  auto &Ctx = M.getContext();
+
+  auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
+                                                             RemappingFilename);
+  if (std::error_code EC = ReaderOrErr.getError()) {
+    std::string Msg = "Could not open profile: " + EC.message();
+    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+    return false;
+  }
+
+  Reader = std::move(ReaderOrErr.get());
+  Reader->setModule(&M);
+  ProfileIsValid = (Reader->read() == sampleprof_error::success);
+  Reader->getSummary();
+
+  return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+  Function &Func = MF.getFunction();
+  clearFunctionData(false);
+  Samples = Reader->getSamplesFor(Func);
+  if (!Samples || Samples->empty())
+    return false;
+
+  if (getFunctionLoc(MF) == 0)
+    return false;
+
+  DenseSet<GlobalValue::GUID> InlinedGUIDs;
+  bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+  // Set the new BPI, BFI.
+  setBranchProbs(MF);
+
+  return Changed;
+}
+
+} // namespace llvm
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+  if (!MIRSampleLoader->isValid())
+    return false;
+
+  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+                    << MF.getFunction().getName() << "\n");
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  MIRSampleLoader->setInitVals(
+      &getAnalysis<MachineDominatorTree>(),
+      &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+      MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+  MF.RenumberBlocks();
+  if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+      (ViewBlockFreqFuncName.empty() ||
+       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+    MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+  }
+
+  bool Changed = MIRSampleLoader->runOnFunction(MF);
+
+  if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+      (ViewBlockFreqFuncName.empty() ||
+       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+    MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+  }
+
+  return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+                    << "\n");
+
+  MIRSampleLoader->setFSPass(P);
+  return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineBlockFrequencyInfo>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
index c31dece..2a90c31 100644 (file)
@@ -172,6 +172,24 @@ static cl::opt<bool>
     FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
                      cl::desc("Do not insert FS-AFDO discriminators before "
                               "emit."));
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+    "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+    cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+    "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+    cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+    FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+                  cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+    "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+    cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
 
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
@@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
   return TargetID;
 }
 
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static const std::string getFSProfileFile(const TargetMachine *TM) {
+  if (!FSProfileFile.empty())
+    return FSProfileFile.getValue();
+  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static const std::string getFSRemappingFile(const TargetMachine *TM) {
+  if (!FSRemappingFile.empty())
+    return FSRemappingFile.getValue();
+  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileRemappingFile;
+}
+
 //===---------------------------------------------------------------------===//
 /// TargetPassConfig
 //===---------------------------------------------------------------------===//
@@ -1115,9 +1155,15 @@ void TargetPassConfig::addMachinePasses() {
 
   // Add a FSDiscriminator pass right before RA, so that we could get
   // more precise SampleFDO profile for RA.
-  if (EnableFSDiscriminator)
+  if (EnableFSDiscriminator) {
     addPass(createMIRAddFSDiscriminatorsPass(
         sampleprof::FSDiscriminatorPass::Pass1));
+    const std::string ProfileFile = getFSProfileFile(TM);
+    if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+      addPass(
+          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+                                     sampleprof::FSDiscriminatorPass::Pass1));
+  }
 
   // Run register allocation and passes that are tightly coupled with it,
   // including phi elimination and scheduling.
@@ -1471,9 +1517,15 @@ bool TargetPassConfig::addGCPasses() {
 
 /// Add standard basic block placement passes.
 void TargetPassConfig::addBlockPlacement() {
-  if (EnableFSDiscriminator)
+  if (EnableFSDiscriminator) {
     addPass(createMIRAddFSDiscriminatorsPass(
         sampleprof::FSDiscriminatorPass::Pass2));
+    const std::string ProfileFile = getFSProfileFile(TM);
+    if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+      addPass(
+          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+                                     sampleprof::FSDiscriminatorPass::Pass2));
+  }
   if (addPass(&MachineBlockPlacementID)) {
     // Run a separate pass to collect block placement statistics.
     if (EnableBlockPlacementStats)
index 6563af0..73a48a7 100644 (file)
@@ -230,6 +230,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
     PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
                         PGOOptions::NoCSAction, true);
   }
+  if (TM)
+    TM->setPGOOption(PGOOpt);
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
index 8e9c79f..e3e06a2 100644 (file)
@@ -358,10 +358,10 @@ public:
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
       std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
       std::function<const TargetLibraryInfo &(Function &)> GetTLI)
-      : SampleProfileLoaderBaseImpl(std::string(Name)),
+      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
         GetAC(std::move(GetAssumptionCache)),
         GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
-        RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+        LTOPhase(LTOPhase) {}
 
   bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -417,9 +417,6 @@ protected:
   /// Profile tracker for different context.
   std::unique_ptr<SampleContextTracker> ContextTracker;
 
-  /// Name of the profile remapping file to load.
-  std::string RemappingFilename;
-
   /// Flag indicating whether input profile is context-sensitive
   bool ProfileIsCS = false;
 
diff --git a/llvm/test/CodeGen/X86/Inputs/fsloader.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader.afdo
new file mode 100644 (file)
index 0000000..debfcd3
--- /dev/null
@@ -0,0 +1,35 @@
+work:42380966:1346190
+ 1: 1246499
+ 5: 1246499
+foo:28798256:4267
+ 0: 4267
+ 2.1: 255999
+ 4: 264627 bar:250018
+ 4.512: 269485 bar:278102
+ 4.4608: 280297 bar:280933
+ 4.12288: 278916 bar:267752
+ 5: 264627
+ 5.4096: 269485
+ 5.8192: 260670
+ 5.8704: 278916
+ 6: 11541
+ 6.3584: 278916 work:284547
+ 6.4096: 260670 work:249428
+ 6.8704: 11541
+ 7: 272442
+ 7.512: 283590
+ 7.4608: 234082
+ 7.9728: 279149
+ 8: 11541
+ 8.11776: 283590 work:305061
+ 8.12288: 279149 work:281368
+ 8.13824: 234082 work:225786
+ 10: 4050
+bar:9504180:1076805
+ 2: 1056020
+ 3: 1056020
+main:20360:0
+ 0: 0
+ 2.1: 4045
+ 3: 4156 foo:4267
+ 5: 0
index 7695f3e..35e4d46 100644 (file)
@@ -1,4 +1,7 @@
 ; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo
+; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER
+;
 ;;
 ;; C source code for the test (compiler at -O3):
 ;; // A test case for loop unroll.
 ; CHECK: .byte   1
 ; CHECK: .size   __llvm_fs_discriminator__, 1
 
+;; Check that new branch probs are generated.
+; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (5 -> 8): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x021c112e / 0x80000000 = 1.65%
+; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35%
+; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57%
+; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43%
+; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
+; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
+; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46%
+; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54%
+
+
 target triple = "x86_64-unknown-linux-gnu"
 
 @sum = dso_local local_unnamed_addr global i32 0, align 4
index d88636a..6c11da5 100644 (file)
@@ -284,6 +284,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
       P->CSAction = PGOOptions::CSIRUse;
     }
   }
+  if (TM)
+    TM->setPGOOption(P);
+
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
   CGSCCAnalysisManager CGAM;