[Object] Refactor build ID parsing into Object lib.
authorDaniel Thornburgh <dthorn@google.com>
Mon, 3 Apr 2023 18:15:31 +0000 (11:15 -0700)
committerDaniel Thornburgh <dthorn@google.com>
Wed, 5 Apr 2023 18:25:26 +0000 (11:25 -0700)
This makes parsing for build IDs in the markup filter slightly more
permissive, in line with fromHex.

It also removes the distinction between missing build ID and empty build
ID; empty build IDs aren't a useful concept, since their purpose is to
uniquely identify a binary. This removes a layer of indirection wherever
build IDs are obtained.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D147485

llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
llvm/include/llvm/Object/BuildID.h
llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
llvm/lib/Debuginfod/Debuginfod.cpp
llvm/lib/Object/BuildID.cpp
llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
llvm/lib/ProfileData/RawMemProfReader.cpp
llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
llvm/tools/llvm-objdump/llvm-objdump.cpp
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

index 5342556..429f9cb 100644 (file)
 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
 
-#include "Markup.h"
-
-#include <map>
-
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/Object/BuildID.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
+#include <map>
 
 namespace llvm {
 namespace symbolize {
@@ -116,7 +115,7 @@ private:
   std::optional<uint64_t> parseAddr(StringRef Str) const;
   std::optional<uint64_t> parseModuleID(StringRef Str) const;
   std::optional<uint64_t> parseSize(StringRef Str) const;
-  std::optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
+  object::BuildID parseBuildID(StringRef Str) const;
   std::optional<std::string> parseMode(StringRef Str) const;
   std::optional<PCType> parsePCType(StringRef Str) const;
   std::optional<uint64_t> parseFrameNumber(StringRef Str) const;
index 91c247b..b20f32b 100644 (file)
@@ -29,8 +29,11 @@ typedef ArrayRef<uint8_t> BuildIDRef;
 
 class ObjectFile;
 
+/// Parses a build ID from a hex string.
+BuildID parseBuildID(StringRef Str);
+
 /// Returns the build ID, if any, contained in the given object file.
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj);
+BuildIDRef getBuildID(const ObjectFile *Obj);
 
 /// BuildIDFetcher searches local cache directories for debug info.
 class BuildIDFetcher {
index 5e9d8ac..48833a7 100644 (file)
@@ -513,8 +513,9 @@ MarkupFilter::parseModule(const MarkupNode &Element) const {
   }
   if (!checkNumFields(Element, 4))
     return std::nullopt;
-  ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID,
-                        parseBuildID(Element.Fields[3]));
+  SmallVector<uint8_t> BuildID = parseBuildID(Element.Fields[3]);
+  if (BuildID.empty())
+    return std::nullopt;
   return Module{ID, Name.str(), std::move(BuildID)};
 }
 
@@ -597,16 +598,11 @@ std::optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
 }
 
 // Parse a build ID (%x in the spec).
-std::optional<SmallVector<uint8_t>>
-MarkupFilter::parseBuildID(StringRef Str) const {
-  std::string Bytes;
-  if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) {
+object::BuildID MarkupFilter::parseBuildID(StringRef Str) const {
+  object::BuildID BID = llvm::object::parseBuildID(Str);
+  if (BID.empty())
     reportTypeError(Str, "build ID");
-    return std::nullopt;
-  }
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+  return BID;
 }
 
 // Parses the mode string for an mmap element.
index a92dfbc..499ceae 100644 (file)
@@ -363,12 +363,10 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
                                                 const ELFObjectFileBase *Obj,
                                                 const std::string &ArchName) {
   auto BuildID = getBuildID(Obj);
-  if (!BuildID)
-    return nullptr;
-  if (BuildID->size() < 2)
+  if (BuildID.size() < 2)
     return nullptr;
   std::string DebugBinaryPath;
-  if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath))
+  if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
     return nullptr;
   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
   if (!DbgObjOrErr) {
index af84fe4..394f2b2 100644 (file)
@@ -412,11 +412,11 @@ Error DebuginfodCollection::findBinaries(StringRef Path) {
         if (!Object)
           continue;
 
-        std::optional<BuildIDRef> ID = getBuildID(Object);
-        if (!ID)
+        BuildIDRef ID = getBuildID(Object);
+        if (ID.empty())
           continue;
 
-        std::string IDString = buildIDToString(*ID);
+        std::string IDString = buildIDToString(ID);
         if (Object->hasDebugInfo()) {
           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
index 795c22e..887798a 100644 (file)
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
-namespace llvm {
-namespace object {
+using namespace llvm;
+using namespace llvm::object;
 
 namespace {
 
-template <typename ELFT>
-std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
+template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) {
   auto PhdrsOrErr = Obj.program_headers();
   if (!PhdrsOrErr) {
     consumeError(PhdrsOrErr.takeError());
@@ -45,15 +44,24 @@ std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
 
 } // namespace
 
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj) {
+BuildID llvm::object::parseBuildID(StringRef Str) {
+  std::string Bytes;
+  if (!tryGetFromHex(Str, Bytes))
+    return {};
+  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+                            Bytes.size());
+  return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
+BuildIDRef llvm::object::getBuildID(const ObjectFile *Obj) {
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   return std::nullopt;
 }
 
@@ -88,6 +96,3 @@ std::optional<std::string> BuildIDFetcher::fetch(BuildIDRef BuildID) const {
   }
   return std::nullopt;
 }
-
-} // namespace object
-} // namespace llvm
index c83fb04..0573732 100644 (file)
@@ -955,7 +955,7 @@ static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
 static Expected<std::unique_ptr<BinaryCoverageReader>>
 loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch,
                  StringRef CompilationDir = "",
-                 std::optional<object::BuildIDRef> *BinaryID = nullptr) {
+                 object::BuildIDRef *BinaryID = nullptr) {
   std::unique_ptr<ObjectFile> OF;
   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
     // If we have a universal binary, try to look up the object for the
@@ -1151,14 +1151,14 @@ BinaryCoverageReader::create(
     return std::move(Readers);
   }
 
-  std::optional<object::BuildIDRef> BinaryID;
+  object::BuildIDRef BinaryID;
   auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch, CompilationDir,
                                       BinaryIDs ? &BinaryID : nullptr);
   if (!ReaderOrErr)
     return ReaderOrErr.takeError();
   Readers.push_back(std::move(ReaderOrErr.get()));
-  if (BinaryID)
-    BinaryIDs->push_back(*BinaryID);
+  if (!BinaryID.empty())
+    BinaryIDs->push_back(BinaryID);
   return std::move(Readers);
 }
 
index 0c14ab4..1c7323e 100644 (file)
@@ -337,12 +337,11 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
 
 Error RawMemProfReader::setupForSymbolization() {
   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
-  auto BuildIdOr = object::getBuildID(Object);
-  if (!BuildIdOr.has_value())
+  object::BuildIDRef BinaryId = object::getBuildID(Object);
+  if (BinaryId.empty())
     return make_error<StringError>(Twine("No build id found in binary ") +
                                        Binary.getBinary()->getFileName(),
                                    inconvertibleErrorCode());
-  llvm::ArrayRef<uint8_t> BinaryId = BuildIdOr.value();
 
   int NumMatched = 0;
   for (const auto &Entry : SegmentInfo) {
index 13e1d7f..c9aff84 100644 (file)
@@ -2,25 +2,25 @@ RUN: split-file %s %t
 RUN: llvm-symbolizer --filter-markup < %t/log 2> %t.err
 RUN: FileCheck %s -input-file=%t.err --match-full-lines
 
-CHECK-NOT: '0x4f'
-CHECK-NOT: '00'
-CHECK: error: expected address; found ''
-CHECK: error: expected address; found '42'
-CHECK: error: expected address; found '0xgg'
+CHECK-NOT: error: expected address; found '0x4f'
+CHECK-NOT: error: expected address; found '00'
+CHECK:     error: expected address; found ''
+CHECK:     error: expected address; found '42'
+CHECK:     error: expected address; found '0xgg'
 
-CHECK-NOT: '0'
-CHECK: error: expected module ID; found ''
-CHECK: error: expected module ID; found '-1'
-CHECK-NOT: '077'
-CHECK: error: expected module ID; found '079'
-CHECK-NOT: '0xff'
-CHECK: error: expected module ID; found '0xfg'
-CHECK: error: expected module ID; found '0x'
+CHECK-NOT: error: expected module ID; found '0'
+CHECK:     error: expected module ID; found ''
+CHECK:     error: expected module ID; found '-1'
+CHECK-NOT: error: expected module ID; found '077'
+CHECK:     error: expected module ID; found '079'
+CHECK-NOT: error: expected module ID; found '0xff'
+CHECK:     error: expected module ID; found '0xfg'
+CHECK:     error: expected module ID; found '0x'
 
-CHECK: error: expected build ID; found ''
-CHECK: error: expected build ID; found '0'
-CHECK-NOT: '0xff'
-CHECK: error: expected build ID; found 'fg'
+CHECK:     error: expected build ID; found ''
+CHECK-NOT: error: expected build ID; found '0'
+CHECK-NOT: error: expected build ID; found '0xff'
+CHECK:     error: expected build ID; found 'fg'
 
 ;--- log
 {{{mmap:0x4f:1:unknown}}}
index e3d1419..5abcdcc 100644 (file)
@@ -1285,10 +1285,10 @@ static void createFakeELFSections(ObjectFile &Obj) {
 // Build ID. Returns std::nullopt if nothing was found.
 static std::optional<OwningBinary<Binary>>
 fetchBinaryByBuildID(const ObjectFile &Obj) {
-  std::optional<object::BuildIDRef> BuildID = getBuildID(&Obj);
-  if (!BuildID)
+  object::BuildIDRef BuildID = getBuildID(&Obj);
+  if (BuildID.empty())
     return std::nullopt;
-  std::optional<std::string> Path = BIDFetcher->fetch(*BuildID);
+  std::optional<std::string> Path = BIDFetcher->fetch(BuildID);
   if (!Path)
     return std::nullopt;
   Expected<OwningBinary<Binary>> DebugBinary = createBinary(*Path);
@@ -2943,13 +2943,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
 
 static object::BuildID parseBuildIDArg(const opt::Arg *A) {
   StringRef V(A->getValue());
-  std::string Bytes;
-  if (!tryGetFromHex(V, Bytes))
+  object::BuildID BID = parseBuildID(V);
+  if (BID.empty())
     reportCmdLineError(A->getSpelling() + ": expected a build ID, but got '" +
                        V + "'");
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return object::BuildID(BuildID.begin(), BuildID.end());
+  return BID;
 }
 
 void objdump::invalidArgValue(const opt::Arg *A) {
index dbec2a9..419f998 100644 (file)
@@ -125,15 +125,6 @@ static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
   HTTPClient::initialize();
 }
 
-static object::BuildID parseBuildID(StringRef Str) {
-  std::string Bytes;
-  if (!tryGetFromHex(Str, Bytes))
-    return {};
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return object::BuildID(BuildID.begin(), BuildID.end());
-}
-
 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
                          StringRef InputString, Command &Cmd,
                          std::string &ModuleName, object::BuildID &BuildID,