[llvm-profgen] Clean up code dealing with multiple binaries
authorwlei <wlei@fb.com>
Thu, 12 Aug 2021 01:01:37 +0000 (18:01 -0700)
committerwlei <wlei@fb.com>
Tue, 17 Aug 2021 19:16:07 +0000 (12:16 -0700)
As we decided to support only one binary each time, this patch cleans up the related code dealing with multiple binaries. We can use `llvm-profdata` to merge profile from multiple binaries.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D108002

llvm/tools/llvm-profgen/PerfReader.cpp
llvm/tools/llvm-profgen/PerfReader.h
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/tools/llvm-profgen/ProfileGenerator.h
llvm/tools/llvm-profgen/ProfiledBinary.h
llvm/tools/llvm-profgen/llvm-profgen.cpp

index 4cff3a9..f996753 100644 (file)
@@ -245,8 +245,7 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
 }
 
 void PerfReaderBase::validateCommandLine(
-    cl::list<std::string> &BinaryFilenames,
-    cl::list<std::string> &PerfTraceFilenames) {
+    StringRef BinaryPath, cl::list<std::string> &PerfTraceFilenames) {
   // Allow the invalid perfscript if we only use to show binary disassembly
   if (!ShowDisassemblyOnly) {
     for (auto &File : PerfTraceFilenames) {
@@ -256,20 +255,12 @@ void PerfReaderBase::validateCommandLine(
       }
     }
   }
-  if (BinaryFilenames.size() > 1) {
-    // TODO: remove this if everything is ready to support multiple binaries.
-    exitWithError(
-        "Currently only support one input binary, multiple binaries' "
-        "profile will be merged in one profile and make profile "
-        "summary info inaccurate. Please use `llvm-perfdata` to merge "
-        "profiles from multiple binaries.");
-  }
-  for (auto &Binary : BinaryFilenames) {
-    if (!llvm::sys::fs::exists(Binary)) {
-      std::string Msg = "Input binary(" + Binary + ") doesn't exist!";
-      exitWithError(Msg);
-    }
+
+  if (!llvm::sys::fs::exists(BinaryPath)) {
+    std::string Msg = "Input binary(" + BinaryPath.str() + ") doesn't exist!";
+    exitWithError(Msg);
   }
+
   if (CSProfileGenerator::MaxCompressionSize < -1) {
     exitWithError("Value of --compress-recursion should >= -1");
   }
@@ -280,14 +271,14 @@ void PerfReaderBase::validateCommandLine(
 }
 
 std::unique_ptr<PerfReaderBase>
-PerfReaderBase::create(cl::list<std::string> &BinaryFilenames,
+PerfReaderBase::create(StringRef BinaryPath,
                        cl::list<std::string> &PerfTraceFilenames) {
-  validateCommandLine(BinaryFilenames, PerfTraceFilenames);
+  validateCommandLine(BinaryPath, PerfTraceFilenames);
 
   PerfScriptType PerfType = extractPerfType(PerfTraceFilenames);
   std::unique_ptr<PerfReaderBase> PerfReader;
   if (PerfType == PERF_LBR_STACK) {
-    PerfReader.reset(new HybridPerfReader(BinaryFilenames));
+    PerfReader.reset(new HybridPerfReader(BinaryPath));
   } else if (PerfType == PERF_LBR) {
     // TODO:
     exitWithError("Unsupported perfscript!");
@@ -298,77 +289,48 @@ PerfReaderBase::create(cl::list<std::string> &BinaryFilenames,
   return PerfReader;
 }
 
-PerfReaderBase::PerfReaderBase(cl::list<std::string> &BinaryFilenames) {
-  // Load the binaries.
-  for (auto Filename : BinaryFilenames)
-    loadBinary(Filename, /*AllowNameConflict*/ false);
+PerfReaderBase::PerfReaderBase(StringRef BinaryPath) {
+  // Load the binary.
+  loadBinary(BinaryPath);
 }
 
-ProfiledBinary &PerfReaderBase::loadBinary(const StringRef BinaryPath,
-                                           bool AllowNameConflict) {
-  // The binary table is currently indexed by the binary name not the full
-  // binary path. This is because the user-given path may not match the one
-  // that was actually executed.
-  StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
-
+void PerfReaderBase::loadBinary(const StringRef BinaryPath) {
   // Call to load the binary in the ctor of ProfiledBinary.
-  auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)});
-
-  if (!Ret.second && !AllowNameConflict) {
-    std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() +
-                           " and " + Ret.first->second.getPath().str() + " \n";
-    exitWithError(ErrorMsg);
-  }
-
+  Binary = new ProfiledBinary(BinaryPath);
   // Initialize the base address to preferred address.
-  ProfiledBinary &B = Ret.first->second;
-  uint64_t PreferredAddr = B.getPreferredBaseAddress();
-  AddrToBinaryMap[PreferredAddr] = &B;
-  B.setBaseAddress(PreferredAddr);
-
-  return B;
+  Binary->setBaseAddress(Binary->getPreferredBaseAddress());
 }
 
 void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {
-  // Load the binary.
-  StringRef BinaryPath = Event.BinaryPath;
-  StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
-
-  auto I = BinaryTable.find(BinaryName);
-  // Drop the event which doesn't belong to user-provided binaries
-  if (I == BinaryTable.end())
+  // Drop the event which doesn't belong to user-provided binary
+  StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath);
+  if (Binary->getName() != BinaryName)
     return;
 
-  ProfiledBinary &Binary = I->second;
   // Drop the event if its image is loaded at the same address
-  if (Event.Address == Binary.getBaseAddress()) {
-    Binary.setIsLoadedByMMap(true);
+  if (Event.Address == Binary->getBaseAddress()) {
+    Binary->setIsLoadedByMMap(true);
     return;
   }
 
-  if (Event.Offset == Binary.getTextSegmentOffset()) {
+  if (Event.Offset == Binary->getTextSegmentOffset()) {
     // A binary image could be unloaded and then reloaded at different
-    // place, so update the address map here.
+    // place, so update binary load address.
     // Only update for the first executable segment and assume all other
     // segments are loaded at consecutive memory addresses, which is the case on
     // X64.
-    AddrToBinaryMap.erase(Binary.getBaseAddress());
-    AddrToBinaryMap[Event.Address] = &Binary;
-
-    // Update binary load address.
-    Binary.setBaseAddress(Event.Address);
-
-    Binary.setIsLoadedByMMap(true);
+    Binary->setBaseAddress(Event.Address);
+    Binary->setIsLoadedByMMap(true);
   } else {
     // Verify segments are loaded consecutively.
-    const auto &Offsets = Binary.getTextSegmentOffsets();
+    const auto &Offsets = Binary->getTextSegmentOffsets();
     auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset);
     if (It != Offsets.end() && *It == Event.Offset) {
       // The event is for loading a separate executable segment.
       auto I = std::distance(Offsets.begin(), It);
-      const auto &PreferredAddrs = Binary.getPreferredTextSegmentAddresses();
-      if (PreferredAddrs[I] - Binary.getPreferredBaseAddress() !=
-          Event.Address - Binary.getBaseAddress())
+      const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses();
+      if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() !=
+          Event.Address - Binary->getBaseAddress())
         exitWithError("Executable segments not loaded consecutively");
     } else {
       if (It == Offsets.begin())
@@ -378,23 +340,13 @@ void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {
         // via multiple mmap calls with consecutive memory addresses.
         --It;
         assert(*It < Event.Offset);
-        if (Event.Offset - *It != Event.Address - Binary.getBaseAddress())
+        if (Event.Offset - *It != Event.Address - Binary->getBaseAddress())
           exitWithError("Segment not loaded by consecutive mmaps");
       }
     }
   }
 }
 
-ProfiledBinary *PerfReaderBase::getBinary(uint64_t Address) {
-  auto Iter = AddrToBinaryMap.lower_bound(Address);
-  if (Iter == AddrToBinaryMap.end() || Iter->first != Address) {
-    if (Iter == AddrToBinaryMap.begin())
-      return nullptr;
-    Iter--;
-  }
-  return Iter->second;
-}
-
 // Use ordered map to make the output deterministic
 using OrderedCounterForPrint = std::map<std::string, RangeSample>;
 
@@ -448,20 +400,16 @@ static void printBranchCounter(ContextSampleCounterMap &Counter,
 }
 
 void HybridPerfReader::printUnwinderOutput() {
-  for (auto I : BinarySampleCounters) {
-    const ProfiledBinary *Binary = I.first;
     outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
-    printRangeCounter(I.second, Binary);
+    printRangeCounter(SampleCounters, Binary);
     outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";
-    printBranchCounter(I.second, Binary);
-  }
+    printBranchCounter(SampleCounters, Binary);
 }
 
 void HybridPerfReader::unwindSamples() {
   for (const auto &Item : AggregatedSamples) {
     const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
-    VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
-                             Sample->Binary);
+    VirtualUnwinder Unwinder(&SampleCounters, Binary);
     Unwinder.unwind(Sample, Item.second);
   }
 
@@ -470,8 +418,7 @@ void HybridPerfReader::unwindSamples() {
 }
 
 bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,
-                                     SmallVectorImpl<LBREntry> &LBRStack,
-                                     ProfiledBinary *Binary) {
+                                     SmallVectorImpl<LBREntry> &LBRStack) {
   // The raw format of LBR stack is like:
   // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
   //                           ... 0x4005c8/0x4005dc/P/-/-/0
@@ -572,7 +519,6 @@ bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
   // It's in bottom-up order with each frame in one line.
 
   // Extract stack frames from sample
-  ProfiledBinary *Binary = nullptr;
   while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
     StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
     uint64_t FrameAddr = 0;
@@ -583,16 +529,6 @@ bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
       return false;
     }
     TraceIt.advance();
-    if (!Binary) {
-      Binary = getBinary(FrameAddr);
-      // we might have addr not match the MMAP, skip it
-      if (!Binary) {
-        if (AddrToBinaryMap.size() == 0)
-          WithColor::warning() << "No MMAP event in the perfscript, create it "
-                                  "with '--show-mmap-events'\n";
-        break;
-      }
-    }
     // Currently intermixed frame from different binaries is not supported.
     // Ignore bottom frames not from binary of interest.
     if (!Binary->addressIsCode(FrameAddr))
@@ -635,7 +571,7 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
   // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
   //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
   //
-  std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>();
+  std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>(Binary);
 
   // Parsing call stack and populate into HybridSample.CallStack
   if (!extractCallstack(TraceIt, Sample->CallStack)) {
@@ -644,19 +580,17 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
       TraceIt.advance();
     return;
   }
-  // Set the binary current sample belongs to
-  ProfiledBinary *PB = getBinary(Sample->CallStack.front());
-  Sample->Binary = PB;
-  if (!PB->getMissingMMapWarned() && !PB->getIsLoadedByMMap()) {
+
+  if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
     WithColor::warning() << "No relevant mmap event is matched, will use "
                             "preferred address as the base loading address!\n";
     // Avoid redundant warning, only warn at the first unmatched sample.
-    PB->setMissingMMapWarned(true);
+    Binary->setMissingMMapWarned(true);
   }
 
   if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
     // Parsing LBR stack and populate into HybridSample.LBRStack
-    if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) {
+    if (extractLBRStack(TraceIt, Sample->LBRStack)) {
       // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
       // ranges
       Sample->CallStack.front() = Sample->LBRStack[0].Target;
index 76f5b09..9fd7fd8 100644 (file)
@@ -163,7 +163,7 @@ struct HybridSample : public PerfSample {
   // LBR stack recorded in FIFO order
   SmallVector<LBREntry, 16> LBRStack;
 
-  HybridSample() : PerfSample(PK_HybridSample){};
+  HybridSample(ProfiledBinary *B) : PerfSample(PK_HybridSample), Binary(B){};
   static bool classof(const PerfSample *K) {
     return K->getKind() == PK_HybridSample;
   }
@@ -531,24 +531,13 @@ private:
   const ProfiledBinary *Binary;
 };
 
-// Filename to binary map
-using BinaryMap = StringMap<ProfiledBinary>;
-// Address to binary map for fast look-up
-using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>;
-// Binary to ContextSampleCounters Map to support multiple binary, we may have
-// same binary loaded at different addresses, they should share the same sample
-// counter
-using BinarySampleCounterMap =
-    std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>;
-
 // Load binaries and read perf trace to parse the events and samples
 class PerfReaderBase {
 public:
-  PerfReaderBase(cl::list<std::string> &BinaryFilenames);
+  PerfReaderBase(StringRef BinaryPath);
   virtual ~PerfReaderBase() = default;
   static std::unique_ptr<PerfReaderBase>
-  create(cl::list<std::string> &BinaryFilenames,
-         cl::list<std::string> &PerfTraceFilenames);
+  create(StringRef BinaryPath, cl::list<std::string> &PerfTraceFilenames);
 
   // A LBR sample is like:
   // 0x5c6313f/0x5c63170/P/-/-/0  0x5c630e7/0x5c63130/P/-/-/0 ...
@@ -604,22 +593,20 @@ public:
     StringRef BinaryPath;
   };
 
-  /// Load symbols and disassemble the code of a give binary.
-  /// Also register the binary in the binary table.
-  ///
-  ProfiledBinary &loadBinary(const StringRef BinaryPath,
-                             bool AllowNameConflict = true);
+  /// Load symbols and disassemble the code of a given binary.
+  void loadBinary(const StringRef BinaryPath);
   void updateBinaryAddress(const MMapEvent &Event);
+  ProfiledBinary *getBinary() const { return Binary; }
   PerfScriptType getPerfScriptType() const { return PerfType; }
   // Entry of the reader to parse multiple perf traces
   void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
-  const BinarySampleCounterMap &getBinarySampleCounters() const {
-    return BinarySampleCounters;
+  const ContextSampleCounterMap &getSampleCounters() const {
+    return SampleCounters;
   }
 
 protected:
   /// Validate the command line input
-  static void validateCommandLine(cl::list<std::string> &BinaryFilenames,
+  static void validateCommandLine(StringRef BinaryPath,
                                   cl::list<std::string> &PerfTraceFilenames);
   static PerfScriptType
   extractPerfType(cl::list<std::string> &PerfTraceFilenames);
@@ -636,8 +623,7 @@ protected:
                         SmallVectorImpl<uint64_t> &CallStack);
   // Extract LBR stack from one perf trace line
   bool extractLBRStack(TraceStream &TraceIt,
-                       SmallVectorImpl<LBREntry> &LBRStack,
-                       ProfiledBinary *Binary);
+                       SmallVectorImpl<LBREntry> &LBRStack);
   uint64_t parseAggregatedCount(TraceStream &TraceIt);
   // Parse one sample from multiple perf lines, override this for different
   // sample type
@@ -648,13 +634,10 @@ protected:
   // Post process the profile after trace aggregation, we will do simple range
   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
   virtual void generateRawProfile() = 0;
-  // Helper function for looking up binary in AddressBinaryMap
-  ProfiledBinary *getBinary(uint64_t Address);
 
-  BinaryMap BinaryTable;
-  AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
+  ProfiledBinary *Binary = nullptr;
 
-  BinarySampleCounterMap BinarySampleCounters;
+  ContextSampleCounterMap SampleCounters;
   // Samples with the repeating time generated by the perf reader
   AggregatedCounter AggregatedSamples;
   PerfScriptType PerfType = PERF_UNKNOWN;
@@ -671,8 +654,7 @@ protected:
 */
 class HybridPerfReader : public PerfReaderBase {
 public:
-  HybridPerfReader(cl::list<std::string> &BinaryFilenames)
-      : PerfReaderBase(BinaryFilenames) {
+  HybridPerfReader(StringRef BinaryPath) : PerfReaderBase(BinaryPath) {
     PerfType = PERF_LBR_STACK;
   };
   // Parse the hybrid sample including the call and LBR line
index 071c4e0..eccb88d 100644 (file)
@@ -73,22 +73,17 @@ int32_t CSProfileGenerator::MaxCompressionSize = -1;
 
 int CSProfileGenerator::MaxContextDepth = -1;
 
-static bool
-usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
-  return BinarySampleCounters.size() &&
-         BinarySampleCounters.begin()->first->usePseudoProbes();
-}
-
 std::unique_ptr<ProfileGenerator>
-ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
+ProfileGenerator::create(ProfiledBinary *Binary,
+                         const ContextSampleCounterMap &SampleCounters,
                          enum PerfScriptType SampleType) {
   std::unique_ptr<ProfileGenerator> ProfileGenerator;
   if (SampleType == PERF_LBR_STACK) {
-    if (usePseudoProbes(BinarySampleCounters)) {
+    if (Binary->usePseudoProbes()) {
       ProfileGenerator.reset(
-          new PseudoProbeCSProfileGenerator(BinarySampleCounters));
+          new PseudoProbeCSProfileGenerator(BinarySampleCounters));
     } else {
-      ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
+      ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
     }
   } else {
     // TODO:
@@ -229,23 +224,19 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
 
 void CSProfileGenerator::generateProfile() {
   FunctionSamples::ProfileIsCS = true;
-  for (const auto &BI : BinarySampleCounters) {
-    ProfiledBinary *Binary = BI.first;
-    for (const auto &CI : BI.second) {
-      const StringBasedCtxKey *CtxKey =
-          dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
-      StringRef ContextId(CtxKey->Context);
-      // Get or create function profile for the range
-      FunctionSamples &FunctionProfile =
-          getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
-
-      // Fill in function body samples
-      populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
-                                  Binary);
-      // Fill in boundary sample counts as well as call site samples for calls
-      populateFunctionBoundarySamples(ContextId, FunctionProfile,
-                                      CI.second.BranchCounter, Binary);
-    }
+  for (const auto &CI : SampleCounters) {
+    const StringBasedCtxKey *CtxKey =
+        dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
+    StringRef ContextId(CtxKey->Context);
+    // Get or create function profile for the range
+    FunctionSamples &FunctionProfile =
+        getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
+
+    // Fill in function body samples
+    populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter);
+    // Fill in boundary sample counts as well as call site samples for calls
+    populateFunctionBoundarySamples(ContextId, FunctionProfile,
+                                    CI.second.BranchCounter);
   }
   // Fill in call site value sample for inlined calls and also use context to
   // infer missing samples. Since we don't have call count for inlined
@@ -274,8 +265,7 @@ void CSProfileGenerator::updateBodySamplesforFunctionProfile(
 }
 
 void CSProfileGenerator::populateFunctionBodySamples(
-    FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
-    ProfiledBinary *Binary) {
+    FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
   // Compute disjoint ranges first, so we can use MAX
   // for calculating count for each location.
   RangeSample Ranges;
@@ -314,7 +304,7 @@ void CSProfileGenerator::populateFunctionBodySamples(
 
 void CSProfileGenerator::populateFunctionBoundarySamples(
     StringRef ContextId, FunctionSamples &FunctionProfile,
-    const BranchSample &BranchCounters, ProfiledBinary *Binary) {
+    const BranchSample &BranchCounters) {
 
   for (auto Entry : BranchCounters) {
     uint64_t SourceOffset = Entry.first.first;
@@ -450,8 +440,8 @@ void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
 // Helper function to extract context prefix string stack
 // Extract context stack for reusing, leaf context stack will
 // be added compressed while looking up function profile
-static void
-extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
+static void extractPrefixContextStack(
+    SmallVectorImpl<std::string> &ContextStrStack,
     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
     ProfiledBinary *Binary) {
   for (const auto *P : Probes) {
@@ -463,29 +453,23 @@ void PseudoProbeCSProfileGenerator::generateProfile() {
   // Enable pseudo probe functionalities in SampleProf
   FunctionSamples::ProfileIsProbeBased = true;
   FunctionSamples::ProfileIsCS = true;
-  for (const auto &BI : BinarySampleCounters) {
-    ProfiledBinary *Binary = BI.first;
-    for (const auto &CI : BI.second) {
-      const ProbeBasedCtxKey *CtxKey =
-          dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
-      SmallVector<std::string, 16> ContextStrStack;
-      extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
-      // Fill in function body samples from probes, also infer caller's samples
-      // from callee's probe
-      populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
-                                    Binary);
-      // Fill in boundary samples for a call probe
-      populateBoundarySamplesWithProbes(CI.second.BranchCounter,
-                                        ContextStrStack, Binary);
-    }
+  for (const auto &CI : SampleCounters) {
+    const ProbeBasedCtxKey *CtxKey =
+        dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
+    SmallVector<std::string, 16> ContextStrStack;
+    extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
+    // Fill in function body samples from probes, also infer caller's samples
+    // from callee's probe
+    populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack);
+    // Fill in boundary samples for a call probe
+    populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStrStack);
   }
 
   postProcessProfiles();
 }
 
 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
-    const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
-    ProfiledBinary *Binary) {
+    const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) {
   RangeSample Ranges;
   findDisjointRanges(Ranges, RangeCounter);
   for (const auto &Range : Ranges) {
@@ -524,11 +508,11 @@ void PseudoProbeCSProfileGenerator::extractProbesFromRange(
 
 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
     const RangeSample &RangeCounter,
-    SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
+    SmallVectorImpl<std::string> &ContextStrStack) {
   ProbeCounterMap ProbeCounter;
   // Extract the top frame probes by looking up each address among the range in
   // the Address2ProbeMap
-  extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
+  extractProbesFromRange(RangeCounter, ProbeCounter);
   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
                      std::unordered_set<FunctionSamples *>>
       FrameSamples;
@@ -536,7 +520,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
     const MCDecodedPseudoProbe *Probe = PI.first;
     uint64_t Count = PI.second;
     FunctionSamples &FunctionProfile =
-        getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
+        getFunctionProfileForLeafProbe(ContextStrStack, Probe);
     // Record the current frame and FunctionProfile whenever samples are
     // collected for non-danglie probes. This is for reporting all of the
     // zero count probes of the frame later.
@@ -585,7 +569,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
 
 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
     const BranchSample &BranchCounter,
-    SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
+    SmallVectorImpl<std::string> &ContextStrStack) {
   for (auto BI : BranchCounter) {
     uint64_t SourceOffset = BI.first.first;
     uint64_t TargetOffset = BI.first.second;
@@ -596,7 +580,7 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
     if (CallProbe == nullptr)
       continue;
     FunctionSamples &FunctionProfile =
-        getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
+        getFunctionProfileForLeafProbe(ContextStrStack, CallProbe);
     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
     FunctionProfile.addTotalSamples(Count);
     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
@@ -639,7 +623,7 @@ FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
 
 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
     SmallVectorImpl<std::string> &ContextStrStack,
-    const MCDecodedPseudoProbe *LeafProbe, ProfiledBinary *Binary) {
+    const MCDecodedPseudoProbe *LeafProbe) {
 
   // Explicitly copy the context for appending the leaf context
   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
index dae6c3a..5804961 100644 (file)
@@ -25,10 +25,10 @@ namespace sampleprof {
 class ProfileGenerator {
 
 public:
-  ProfileGenerator(){};
+  ProfileGenerator(ProfiledBinary *B) : Binary(B){};
   virtual ~ProfileGenerator() = default;
   static std::unique_ptr<ProfileGenerator>
-  create(const BinarySampleCounterMap &BinarySampleCounters,
+  create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
          enum PerfScriptType SampleType);
   virtual void generateProfile() = 0;
   // Use SampleProfileWriter to serialize profile map
@@ -57,15 +57,18 @@ protected:
 
   // Used by SampleProfileWriter
   StringMap<FunctionSamples> ProfileMap;
+
+  ProfiledBinary *Binary = nullptr;
 };
 
 class CSProfileGenerator : public ProfileGenerator {
 protected:
-  const BinarySampleCounterMap &BinarySampleCounters;
+  const ContextSampleCounterMap &SampleCounters;
 
 public:
-  CSProfileGenerator(const BinarySampleCounterMap &Counters)
-      : BinarySampleCounters(Counters){};
+  CSProfileGenerator(ProfiledBinary *Binary,
+                     const ContextSampleCounterMap &Counters)
+      : ProfileGenerator(Binary), SampleCounters(Counters){};
 
 public:
   void generateProfile() override;
@@ -210,12 +213,10 @@ private:
                                            const FrameLocation &LeafLoc,
                                            uint64_t Count);
   void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
-                                   const RangeSample &RangeCounters,
-                                   ProfiledBinary *Binary);
+                                   const RangeSample &RangeCounters);
   void populateFunctionBoundarySamples(StringRef ContextId,
                                        FunctionSamples &FunctionProfile,
-                                       const BranchSample &BranchCounters,
-                                       ProfiledBinary *Binary);
+                                       const BranchSample &BranchCounters);
   void populateInferredFunctionSamples();
 
 public:
@@ -231,25 +232,24 @@ using ProbeCounterMap =
 class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
 
 public:
-  PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
-      : CSProfileGenerator(Counters) {}
+  PseudoProbeCSProfileGenerator(ProfiledBinary *Binary,
+                                const ContextSampleCounterMap &Counters)
+      : CSProfileGenerator(Binary, Counters) {}
   void generateProfile() override;
 
 private:
   // Go through each address from range to extract the top frame probe by
   // looking up in the Address2ProbeMap
   void extractProbesFromRange(const RangeSample &RangeCounter,
-                              ProbeCounterMap &ProbeCounter,
-                              ProfiledBinary *Binary);
+                              ProbeCounterMap &ProbeCounter);
   // Fill in function body samples from probes
   void
   populateBodySamplesWithProbes(const RangeSample &RangeCounter,
-                                SmallVectorImpl<std::string> &ContextStrStack,
-                                ProfiledBinary *Binary);
+                                SmallVectorImpl<std::string> &ContextStrStack);
   // Fill in boundary samples for a call probe
   void populateBoundarySamplesWithProbes(
       const BranchSample &BranchCounter,
-      SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
+      SmallVectorImpl<std::string> &ContextStrStack);
   // Helper function to get FunctionSamples for the leaf inlined context
   FunctionSamples &
   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
@@ -258,8 +258,7 @@ private:
   // Helper function to get FunctionSamples for the leaf probe
   FunctionSamples &
   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
-                                 const MCDecodedPseudoProbe *LeafProbe,
-                                 ProfiledBinary *Binary);
+                                 const MCDecodedPseudoProbe *LeafProbe);
 };
 
 } // end namespace sampleprof
index eba36d4..73a01ce 100644 (file)
@@ -183,7 +183,7 @@ class ProfiledBinary {
   }
 
 public:
-  ProfiledBinary(StringRef Path) : Path(Path), ProEpilogTracker(this) {
+  ProfiledBinary(const StringRef Path) : Path(Path), ProEpilogTracker(this) {
     setupSymbolizer();
     load();
   }
index 4045a26..8ce7d51 100644 (file)
@@ -27,11 +27,10 @@ static cl::list<std::string> PerfTraceFilenames(
              "`script` command(the raw perf.data should be profiled with -b)"),
     cl::cat(ProfGenCategory));
 
-static cl::list<std::string>
-    BinaryFilenames("binary", cl::value_desc("binary"), cl::OneOrMore,
-                    llvm::cl::MiscFlags::CommaSeparated,
-                    cl::desc("Path of profiled binary files"),
-                    cl::cat(ProfGenCategory));
+static cl::opt<std::string> BinaryPath(
+    "binary", cl::value_desc("binary"), cl::Required,
+    cl::desc("Path of profiled binary, only one binary is supported."),
+    cl::cat(ProfGenCategory));
 
 extern cl::opt<bool> ShowDisassemblyOnly;
 
@@ -50,19 +49,18 @@ int main(int argc, const char *argv[]) {
   cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
 
   if (ShowDisassemblyOnly) {
-    for (auto BinaryPath : BinaryFilenames) {
-      (void)ProfiledBinary(BinaryPath);
-    }
+    (void)ProfiledBinary(BinaryPath);
     return EXIT_SUCCESS;
   }
 
   // Load binaries and parse perf events and samples
   std::unique_ptr<PerfReaderBase> Reader =
-      PerfReaderBase::create(BinaryFilenames, PerfTraceFilenames);
+      PerfReaderBase::create(BinaryPath, PerfTraceFilenames);
   Reader->parsePerfTraces(PerfTraceFilenames);
 
-  std::unique_ptr<ProfileGenerator> Generator = ProfileGenerator::create(
-      Reader->getBinarySampleCounters(), Reader->getPerfScriptType());
+  std::unique_ptr<ProfileGenerator> Generator =
+      ProfileGenerator::create(Reader->getBinary(), Reader->getSampleCounters(),
+                               Reader->getPerfScriptType());
   Generator->generateProfile();
   Generator->write();