[BOLT] Add support for the latest perf tool
authorMaksim Panchenko <maks@fb.com>
Thu, 21 Jul 2022 01:51:08 +0000 (18:51 -0700)
committerMaksim Panchenko <maks@fb.com>
Fri, 22 Jul 2022 14:56:15 +0000 (07:56 -0700)
The latest perf tool can return non-empty buffer when executing
buildid-list command, even when perf.data was recorded with -B flag.
Some binaries will be listed without the ID, while others may have a
recorded ID. Allow invalid entires on the input, while checking the
valid ones for the match.

Reviewed By: Amir

Differential Revision: https://reviews.llvm.org/D130223

bolt/include/bolt/Profile/DataAggregator.h
bolt/include/bolt/Profile/DataReader.h
bolt/lib/Profile/DataAggregator.cpp
bolt/unittests/CMakeLists.txt
bolt/unittests/Profile/CMakeLists.txt [new file with mode: 0644]
bolt/unittests/Profile/DataAggregator.cpp [new file with mode: 0644]

index 48755c3..9d45f54 100644 (file)
@@ -278,7 +278,7 @@ private:
   /// Parser helpers
   /// Return false if we exhausted our parser buffer and finished parsing
   /// everything
-  bool hasData();
+  bool hasData() const { return !ParsingBuf.empty(); }
 
   /// Print heat map based on LBR samples.
   std::error_code printLBRHeatMap();
@@ -359,10 +359,6 @@ private:
   /// Parse a single pair of binary full path and associated build-id
   Optional<std::pair<StringRef, StringRef>> parseNameBuildIDPair();
 
-  /// Parse the output generated by "perf buildid-list" to extract build-ids
-  /// and return a file name matching a given \p FileBuildID.
-  Optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
-
   /// Coordinate reading and parsing of pre-aggregated file
   ///
   /// The regular perf2bolt aggregation job is to read perf output directly.
@@ -470,6 +466,17 @@ private:
   void dump(const LBREntry &LBR) const;
   void dump(const PerfBranchSample &Sample) const;
   void dump(const PerfMemSample &Sample) const;
+
+public:
+  /// If perf.data was collected without build ids, the buildid-list may contain
+  /// incomplete entries. Return true if the buffer containing
+  /// "perf buildid-list" output has only valid entries and is non- empty.
+  /// Return false otherwise.
+  bool hasAllBuildIDs();
+
+  /// Parse the output generated by "perf buildid-list" to extract build-ids
+  /// and return a file name matching a given \p FileBuildID.
+  Optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
 };
 } // namespace bolt
 } // namespace llvm
index d47457a..60cedfe 100644 (file)
@@ -502,6 +502,9 @@ protected:
   /// Maps of common LTO names to possible matching profiles.
   StringMap<std::vector<FuncBranchData *>> LTOCommonNameMap;
   StringMap<std::vector<FuncMemData *>> LTOCommonNameMemMap;
+
+public:
+  void setParsingBuffer(StringRef Buffer) { ParsingBuf = Buffer; }
 };
 
 } // namespace bolt
index ff0ed2e..39f17c4 100644 (file)
@@ -77,7 +77,7 @@ MaxSamples("max-samples",
   cl::Hidden,
   cl::cat(AggregatorCategory));
 
-static cl::opt<bool> ReadPreAggregated(
+cl::opt<bool> ReadPreAggregated(
     "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
     cl::cat(AggregatorCategory));
 
@@ -294,23 +294,22 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
 
   FileBuf = std::move(*MB);
   ParsingBuf = FileBuf->getBuffer();
-  if (ParsingBuf.empty()) {
-    errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
-              "data was recorded without it\n";
-    return;
-  }
 
-  Col = 0;
-  Line = 1;
   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
   if (!FileName) {
-    errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
-              "This indicates the input binary supplied for data aggregation "
-              "is not the same recorded by perf when collecting profiling "
-              "data, or there were no samples recorded for the binary. "
-              "Use -ignore-build-id option to override.\n";
-    if (!opts::IgnoreBuildID)
-      abort();
+    if (hasAllBuildIDs()) {
+      errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
+                "This indicates the input binary supplied for data aggregation "
+                "is not the same recorded by perf when collecting profiling "
+                "data, or there were no samples recorded for the binary. "
+                "Use -ignore-build-id option to override.\n";
+      if (!opts::IgnoreBuildID)
+        abort();
+    } else {
+      errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
+                "data was recorded without it\n";
+      return;
+    }
   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
     BuildIDBinaryName = std::string(*FileName);
@@ -1274,13 +1273,6 @@ DataAggregator::parseAggregatedLBREntry() {
                             Type};
 }
 
-bool DataAggregator::hasData() {
-  if (ParsingBuf.size() == 0)
-    return false;
-
-  return true;
-}
-
 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
   return opts::IgnoreInterruptLBR &&
          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
@@ -2152,6 +2144,11 @@ DataAggregator::parseNameBuildIDPair() {
   if (std::error_code EC = BuildIDStr.getError())
     return NoneType();
 
+  // If one of the strings is missing, don't issue a parsing error, but still
+  // do not return a value.
+  if (ParsingBuf[0] == '\n')
+    return NoneType();
+
   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
   if (std::error_code EC = NameStr.getError())
     return NoneType();
@@ -2160,16 +2157,48 @@ DataAggregator::parseNameBuildIDPair() {
   return std::make_pair(NameStr.get(), BuildIDStr.get());
 }
 
+bool DataAggregator::hasAllBuildIDs() {
+  const StringRef SavedParsingBuf = ParsingBuf;
+
+  if (!hasData())
+    return false;
+
+  bool HasInvalidEntries = false;
+  while (hasData()) {
+    if (!parseNameBuildIDPair()) {
+      HasInvalidEntries = true;
+      break;
+    }
+  }
+
+  ParsingBuf = SavedParsingBuf;
+
+  return !HasInvalidEntries;
+}
+
 Optional<StringRef>
 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
+  const StringRef SavedParsingBuf = ParsingBuf;
+
+  StringRef FileName;
   while (hasData()) {
     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
-    if (!IDPair)
-      return NoneType();
+    if (!IDPair) {
+      consumeRestOfLine();
+      continue;
+    }
 
-    if (IDPair->second.startswith(FileBuildID))
-      return sys::path::filename(IDPair->first);
+    if (IDPair->second.startswith(FileBuildID)) {
+      FileName = sys::path::filename(IDPair->first);
+      break;
+    }
   }
+
+  ParsingBuf = SavedParsingBuf;
+
+  if (!FileName.empty())
+    return FileName;
+
   return NoneType();
 }
 
index b5c7f17..77159e9 100644 (file)
@@ -6,3 +6,4 @@ function(add_bolt_unittest test_dirname)
 endfunction()
 
 add_subdirectory(Core)
+add_subdirectory(Profile)
diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b0f1ec0
--- /dev/null
@@ -0,0 +1,9 @@
+add_bolt_unittest(ProfileTests
+  DataAggregator.cpp
+  )
+
+target_link_libraries(ProfileTests
+  PRIVATE
+  LLVMBOLTProfile
+  )
+
diff --git a/bolt/unittests/Profile/DataAggregator.cpp b/bolt/unittests/Profile/DataAggregator.cpp
new file mode 100644 (file)
index 0000000..a6e394d
--- /dev/null
@@ -0,0 +1,51 @@
+//===- bolt/unittests/Profile/DataAggregator.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/Support/CommandLine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+namespace opts {
+extern cl::opt<bool> ReadPreAggregated;
+}
+
+TEST(DataAggregatorTest, buildID) {
+  // Avoid looking for perf tool.
+  opts::ReadPreAggregated = true;
+
+  DataAggregator DA("<pseudo input>");
+  Optional<StringRef> FileName;
+
+  DA.setParsingBuffer("");
+  ASSERT_FALSE(DA.hasAllBuildIDs());
+  FileName = DA.getFileNameForBuildID("1234");
+  ASSERT_FALSE(FileName);
+
+  StringRef PartialValidBuildIDs = "     File0\n"
+                                   "1111 File1\n"
+                                   "     File2\n";
+  DA.setParsingBuffer(PartialValidBuildIDs);
+  ASSERT_FALSE(DA.hasAllBuildIDs());
+  FileName = DA.getFileNameForBuildID("0000");
+  ASSERT_FALSE(FileName);
+  FileName = DA.getFileNameForBuildID("1111");
+  ASSERT_EQ(*FileName, "File1");
+
+  StringRef AllValidBuildIDs = "0000 File0\n"
+                               "1111 File1\n"
+                               "2222 File2\n";
+  DA.setParsingBuffer(AllValidBuildIDs);
+  ASSERT_TRUE(DA.hasAllBuildIDs());
+  FileName = DA.getFileNameForBuildID("1234");
+  ASSERT_FALSE(FileName);
+  FileName = DA.getFileNameForBuildID("2222");
+  ASSERT_EQ(*FileName, "File2");
+}