[memprof] Record BuildIDs in the raw profile.
authorSnehasish Kumar <snehasishk@google.com>
Tue, 28 Feb 2023 21:33:30 +0000 (21:33 +0000)
committerSnehasish Kumar <snehasishk@google.com>
Mon, 13 Mar 2023 19:28:38 +0000 (19:28 +0000)
This patch adds support for recording BuildIds usng the sanitizer
ListOfModules API. We add another entry to the SegmentEntry struct and
change the memprof raw version.

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D145190

26 files changed:
clang/test/CodeGen/Inputs/memprof.exe
clang/test/CodeGen/Inputs/memprof.memprofraw
compiler-rt/include/profile/MemProfData.inc
compiler-rt/lib/memprof/memprof_allocator.cpp
compiler-rt/lib/memprof/memprof_rawprofile.cpp
compiler-rt/lib/memprof/memprof_rawprofile.h
llvm/include/llvm/ProfileData/MemProfData.inc
llvm/lib/ProfileData/RawMemProfReader.cpp
llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe
llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw
llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe [new file with mode: 0755]
llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw [new file with mode: 0644]
llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw
llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe
llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw
llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe
llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw
llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
llvm/test/tools/llvm-profdata/memprof-basic.test
llvm/test/tools/llvm-profdata/memprof-buildid.test [new file with mode: 0644]
llvm/test/tools/llvm-profdata/memprof-inline.test
llvm/test/tools/llvm-profdata/memprof-multi.test

index ad7a041..c03b8b6 100755 (executable)
Binary files a/clang/test/CodeGen/Inputs/memprof.exe and b/clang/test/CodeGen/Inputs/memprof.exe differ
index e64214a..c3e0818 100644 (file)
Binary files a/clang/test/CodeGen/Inputs/memprof.memprofraw and b/clang/test/CodeGen/Inputs/memprof.memprofraw differ
index c533073..b82a4ba 100644 (file)
@@ -19,6 +19,7 @@
  * synced up.
  *
 \*===----------------------------------------------------------------------===*/
+#include <string.h>
 
 #ifdef _MSC_VER
 #define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
@@ -32,7 +33,9 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 2ULL
+#define MEMPROF_RAW_VERSION 3ULL
+
+#define MEMPROF_BUILDID_MAX_SIZE 32ULL
 
 namespace llvm {
 namespace memprof {
@@ -46,37 +49,40 @@ PACKED(struct Header {
   uint64_t StackOffset;
 });
 
-
 // A struct describing the information necessary to describe a /proc/maps
 // segment entry for a particular binary/library identified by its build id.
 PACKED(struct SegmentEntry {
   uint64_t Start;
   uint64_t End;
   uint64_t Offset;
-  // This field is unused until sanitizer procmaps support for build ids for
-  // Linux-Elf is implemented.
-  uint8_t BuildId[32] = {0};
+  uint64_t BuildIdSize;
+  uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0};
 
-  SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
-    Start(S), End(E), Offset(O) {}
+  // This constructor is only used in tests so don't set the BuildId.
+  SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
+      : Start(S), End(E), Offset(O), BuildIdSize(0) {}
 
   SegmentEntry(const SegmentEntry& S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
+    BuildIdSize = S.BuildIdSize;
+    memcpy(BuildId, S.BuildId, S.BuildIdSize);
   }
 
   SegmentEntry& operator=(const SegmentEntry& S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
+    BuildIdSize = S.BuildIdSize;
+    memcpy(BuildId, S.BuildId, S.BuildIdSize);
     return *this;
   }
 
   bool operator==(const SegmentEntry& S) const {
-    return Start == S.Start &&
-           End == S.End &&
-           Offset == S.Offset;
+    return Start == S.Start && End == S.End && Offset == S.Offset &&
+           BuildIdSize == S.BuildIdSize &&
+           memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
   }
 });
 
index 6e3fa7f..751e4c4 100644 (file)
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 
 #include <sched.h>
@@ -295,8 +295,9 @@ struct Allocator {
       // memprof_rawprofile.h.
       char *Buffer = nullptr;
 
-      MemoryMappingLayout Layout(/*cache_enabled=*/true);
-      u64 BytesSerialized = SerializeToRawProfile(MIBMap, Layout, Buffer);
+      __sanitizer::ListOfModules Modules;
+      Modules.init();
+      u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer);
       CHECK(Buffer && BytesSerialized && "could not serialize to buffer");
       report_file.Write(Buffer, BytesSerialized);
     }
index 88f3c34..d1447e7 100644 (file)
@@ -33,12 +33,14 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock *const &MIB,
 }
 } // namespace
 
-u64 SegmentSizeBytes(MemoryMappingLayoutBase &Layout) {
+u64 SegmentSizeBytes(__sanitizer::ListOfModules &Modules) {
   u64 NumSegmentsToRecord = 0;
-  MemoryMappedSegment segment;
-  for (Layout.Reset(); Layout.Next(&segment);)
-    if (segment.IsReadable() && segment.IsExecutable())
-      NumSegmentsToRecord++;
+  for (const auto &Module : Modules) {
+    for (const auto &Segment : Module.ranges()) {
+      if (Segment.executable)
+        NumSegmentsToRecord++;
+    }
+  }
 
   return sizeof(u64) // A header which stores the number of records.
          + sizeof(SegmentEntry) * NumSegmentsToRecord;
@@ -51,28 +53,31 @@ u64 SegmentSizeBytes(MemoryMappingLayoutBase &Layout) {
 // Start
 // End
 // Offset
-// BuildID 32B
+// UuidSize
+// Uuid 32B
 // ----------
 // ...
-void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout,
+void SerializeSegmentsToBuffer(__sanitizer::ListOfModules &Modules,
                                const u64 ExpectedNumBytes, char *&Buffer) {
   char *Ptr = Buffer;
   // Reserve space for the final count.
   Ptr += sizeof(u64);
 
   u64 NumSegmentsRecorded = 0;
-  MemoryMappedSegment segment;
-
-  for (Layout.Reset(); Layout.Next(&segment);) {
-    if (segment.IsReadable() && segment.IsExecutable()) {
-      // TODO: Record segment.uuid when it is implemented for Linux-Elf.
-      SegmentEntry Entry(segment.start, segment.end, segment.offset);
-      memcpy(Ptr, &Entry, sizeof(SegmentEntry));
-      Ptr += sizeof(SegmentEntry);
-      NumSegmentsRecorded++;
+
+  for (const auto &Module : Modules) {
+    for (const auto &Segment : Module.ranges()) {
+      if (Segment.executable) {
+        SegmentEntry Entry(Segment.beg, Segment.end, Module.base_address());
+        CHECK(Module.uuid_size() <= MEMPROF_BUILDID_MAX_SIZE);
+        Entry.BuildIdSize = Module.uuid_size();
+        memcpy(Entry.BuildId, Module.uuid(), Module.uuid_size());
+        memcpy(Ptr, &Entry, sizeof(SegmentEntry));
+        Ptr += sizeof(SegmentEntry);
+        NumSegmentsRecorded++;
+      }
     }
   }
-
   // Store the number of segments we recorded in the space we reserved.
   *((u64 *)Buffer) = NumSegmentsRecorded;
   CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
@@ -198,11 +203,11 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
 // ----------
 // Optional Padding Bytes
 // ...
-u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
+u64 SerializeToRawProfile(MIBMapTy &MIBMap, __sanitizer::ListOfModules &Modules,
                           char *&Buffer) {
   // Each section size is rounded up to 8b since the first entry in each section
   // is a u64 which holds the number of entries in the section by convention.
-  const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Layout), 8);
+  const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Modules), 8);
 
   Vector<u64> StackIds;
   MIBMap.ForEach(RecordStackId, reinterpret_cast<void *>(&StackIds));
@@ -232,7 +237,7 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
                 sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
   Ptr = WriteBytes(header, Ptr);
 
-  SerializeSegmentsToBuffer(Layout, NumSegmentBytes, Ptr);
+  SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
   Ptr += NumSegmentBytes;
 
   SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
index 575104e..6a54b05 100644 (file)
@@ -2,13 +2,13 @@
 #define MEMPROF_RAWPROFILE_H_
 
 #include "memprof_mibmap.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_common.h"
 
 namespace __memprof {
 // Serialize the in-memory representation of the memprof profile to the raw
 // binary format. The format itself is documented memprof_rawprofile.cpp.
-u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout,
-                          char *&Buffer);
+u64 SerializeToRawProfile(MIBMapTy &BlockCache,
+                          __sanitizer::ListOfModules &Modules, char *&Buffer);
 } // namespace __memprof
 
 #endif // MEMPROF_RAWPROFILE_H_
index c533073..b82a4ba 100644 (file)
@@ -19,6 +19,7 @@
  * synced up.
  *
 \*===----------------------------------------------------------------------===*/
+#include <string.h>
 
 #ifdef _MSC_VER
 #define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
@@ -32,7 +33,9 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 2ULL
+#define MEMPROF_RAW_VERSION 3ULL
+
+#define MEMPROF_BUILDID_MAX_SIZE 32ULL
 
 namespace llvm {
 namespace memprof {
@@ -46,37 +49,40 @@ PACKED(struct Header {
   uint64_t StackOffset;
 });
 
-
 // A struct describing the information necessary to describe a /proc/maps
 // segment entry for a particular binary/library identified by its build id.
 PACKED(struct SegmentEntry {
   uint64_t Start;
   uint64_t End;
   uint64_t Offset;
-  // This field is unused until sanitizer procmaps support for build ids for
-  // Linux-Elf is implemented.
-  uint8_t BuildId[32] = {0};
+  uint64_t BuildIdSize;
+  uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0};
 
-  SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
-    Start(S), End(E), Offset(O) {}
+  // This constructor is only used in tests so don't set the BuildId.
+  SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
+      : Start(S), End(E), Offset(O), BuildIdSize(0) {}
 
   SegmentEntry(const SegmentEntry& S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
+    BuildIdSize = S.BuildIdSize;
+    memcpy(BuildId, S.BuildId, S.BuildIdSize);
   }
 
   SegmentEntry& operator=(const SegmentEntry& S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
+    BuildIdSize = S.BuildIdSize;
+    memcpy(BuildId, S.BuildId, S.BuildIdSize);
     return *this;
   }
 
   bool operator==(const SegmentEntry& S) const {
-    return Start == S.Start &&
-           End == S.End &&
-           Offset == S.Offset;
+    return Start == S.Start && End == S.End && Offset == S.Offset &&
+           BuildIdSize == S.BuildIdSize &&
+           memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
   }
 });
 
index 958bbc5..dd62a2f 100644 (file)
@@ -158,15 +158,13 @@ bool isRuntimePath(const StringRef Path) {
 }
 
 std::string getBuildIdString(const SegmentEntry &Entry) {
-  constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
-  constexpr uint8_t Zeros[Size] = {0};
   // If the build id is unset print a helpful string instead of all zeros.
-  if (memcmp(Entry.BuildId, Zeros, Size) == 0)
+  if (Entry.BuildIdSize == 0)
     return "<None>";
 
   std::string Str;
   raw_string_ostream OS(Str);
-  for (size_t I = 0; I < Size; I++) {
+  for (size_t I = 0; I < Entry.BuildIdSize; I++) {
     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
   }
   return OS.str();
index 0ed4016..b10c2f9 100755 (executable)
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe differ
index 7511ca7..790249a 100644 (file)
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw differ
index 04330fe..d5160e7 100644 (file)
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw differ
index d321a05..9db8618 100755 (executable)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe differ
index 612eaa0..66210b4 100644 (file)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw differ
diff --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe
new file mode 100755 (executable)
index 0000000..103667b
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe differ
diff --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw
new file mode 100644 (file)
index 0000000..11579b2
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw differ
index 6a5594e..3bc3ff2 100755 (executable)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe differ
index 8e0ad19..dad562e 100644 (file)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw differ
index 02100d0..65ce444 100755 (executable)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe differ
index 8756698..651e625 100644 (file)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw differ
index 900a453..8a3ddeb 100755 (executable)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe differ
index b56b9a9..4ef5eee 100644 (file)
Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw differ
index e709254..5365a0b 100755 (executable)
@@ -72,6 +72,7 @@ INPUTS["basic"]="BASIC"
 INPUTS["inline"]="INLINE"
 INPUTS["multi"]="MULTI"
 INPUTS["pic"]="BASIC;-pie"
+INPUTS["buildid"]="BASIC;-Wl,-build-id=sha1"
 
 for name in "${!INPUTS[@]}"; do
   IFS=";" read -r src flags <<< "${INPUTS[$name]}"
index 2d17251..8eaa2fa 100644 (file)
@@ -8,17 +8,17 @@ additional allocations which do not originate from the main binary are pruned.
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:   Summary:
-CHECK-NEXT:     Version: 2
+CHECK-NEXT:     Version: 3
 CHECK-NEXT:     NumSegments: {{[0-9]+}}
 CHECK-NEXT:     NumMibInfo: 2
 CHECK-NEXT:     NumAllocFunctions: 1
 CHECK-NEXT:     NumStackOffsets: 2
 CHECK-NEXT:   Segments:
 CHECK-NEXT:   -
-CHECK-NEXT:     BuildId: <None>
-CHECK-NEXT:     Start: 0x{{[0-9]+}}
-CHECK-NEXT:     End: 0x{{[0-9]+}}
-CHECK-NEXT:     Offset: 0x{{[0-9]+}}
+CHECK-NEXT:     BuildId: {{[[:xdigit:]]+}}
+CHECK-NEXT:     Start: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     End: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
diff --git a/llvm/test/tools/llvm-profdata/memprof-buildid.test b/llvm/test/tools/llvm-profdata/memprof-buildid.test
new file mode 100644 (file)
index 0000000..9b055d3
--- /dev/null
@@ -0,0 +1,12 @@
+REQUIRES: x86_64-linux
+
+To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang
+RUN: llvm-readelf --notes %p/Inputs/buildid.memprofexe > %t1.txt
+RUN: llvm-profdata show --memory %p/Inputs/buildid.memprofraw --profiled-binary %p/Inputs/buildid.memprofexe -o -  > %t2.txt
+RUN: cat %t1.txt %t2.txt | FileCheck %s
+
+COM: First extract the id from the llvm-readelf output.
+CHECK: Build ID: [[ID:[[:xdigit:]]+]]
+
+COM: Then match it with the profdata output.
+CHECK: BuildId: {{.*}}[[ID]]
index 571beb9..dd842c0 100644 (file)
@@ -5,17 +5,17 @@ RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:  Summary:
-CHECK-NEXT:    Version: 2
+CHECK-NEXT:    Version: 3
 CHECK-NEXT:    NumSegments: {{[0-9]+}}
 CHECK-NEXT:    NumMibInfo: 2
 CHECK-NEXT:    NumAllocFunctions: 2
 CHECK-NEXT:    NumStackOffsets: 1
 CHECK-NEXT:  Segments:
 CHECK-NEXT:  -
-CHECK-NEXT:    BuildId: <None>
-CHECK-NEXT:    Start: 0x{{[0-9]+}}
-CHECK-NEXT:    End: 0x{{[0-9]+}}
-CHECK-NEXT:    Offset: 0x{{[0-9]+}}
+CHECK-NEXT:    BuildId: {{[[:xdigit:]]+}}
+CHECK-NEXT:    Start: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:    End: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:    Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:  -
 
 CHECK:  Records:
index 5918a95..f3cdbd1 100644 (file)
@@ -7,7 +7,7 @@ We expect 2 MIB entries, 1 each for the malloc calls in the program.
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:  Summary:
-CHECK-NEXT:    Version: 2
+CHECK-NEXT:    Version: 3
 CHECK-NEXT:    NumSegments: {{[0-9]+}}
 CHECK-NEXT:    NumMibInfo: 2
 CHECK-NEXT:    NumAllocFunctions: 1