Add hashing of the .text section to ProcessMinidump.
authorGreg Clayton <gclayton@fb.com>
Mon, 10 Aug 2020 22:07:47 +0000 (15:07 -0700)
committerGreg Clayton <gclayton@fb.com>
Mon, 24 Aug 2020 18:43:50 +0000 (11:43 -0700)
Breakpad will always have a UUID for binaries when it creates minidump files. If an ELF files has a GNU build ID, it will use that. If it doesn't, it will create one by hashing up to the first 4096 bytes of the .text section. LLDB was not able to load these binaries even when we had the right binary because the UUID didn't match. LLDB will use the GNU build ID first as the main UUID for a binary and fallback onto a 8 byte CRC if a binary doesn't have one. With this fix, we will check for the Breakpad hash or the Facebook hash (a modified version of the breakpad hash that collides a bit less) and accept binaries when these hashes match.

Differential Revision: https://reviews.llvm.org/D86261

lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py
lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-overflow.yaml [new file with mode: 0644]
lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad.yaml [new file with mode: 0644]
lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-breakpad-uuid-match.yaml [new file with mode: 0644]
lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-facebook-uuid-match.yaml [new file with mode: 0644]

index fc8ee34..af378ea 100644 (file)
@@ -121,6 +121,72 @@ private:
   lldb::addr_t m_base;
   lldb::addr_t m_size;
 };
+
+/// Duplicate the HashElfTextSection() from the breakpad sources.
+///
+/// Breakpad, a Google crash log reporting tool suite, creates minidump files
+/// for many different architectures. When using Breakpad to create ELF
+/// minidumps, it will check for a GNU build ID when creating a minidump file
+/// and if one doesn't exist in the file, it will say the UUID of the file is a
+/// checksum of up to the first 4096 bytes of the .text section. Facebook also
+/// uses breakpad and modified this hash to avoid collisions so we can
+/// calculate and check for this as well.
+///
+/// The breakpad code might end up hashing up to 15 bytes that immediately
+/// follow the .text section in the file, so this code must do exactly what it
+/// does so we can get an exact match for the UUID.
+///
+/// \param[in] module_sp The module to grab the .text section from.
+///
+/// \param[in/out] breakpad_uuid A vector that will receive the calculated
+///                breakpad .text hash.
+///
+/// \param[in/out] facebook_uuid A vector that will receive the calculated
+///                facebook .text hash.
+///
+void HashElfTextSection(ModuleSP module_sp, std::vector<uint8_t> &breakpad_uuid,
+                        std::vector<uint8_t> &facebook_uuid) {
+  SectionList *sect_list = module_sp->GetSectionList();
+  if (sect_list == nullptr)
+    return;
+  SectionSP sect_sp = sect_list->FindSectionByName(ConstString(".text"));
+  if (!sect_sp)
+    return;
+  constexpr size_t kMDGUIDSize = 16;
+  constexpr size_t kBreakpadPageSize = 4096;
+  // The breakpad code has a bug where it might access beyond the end of a
+  // .text section by up to 15 bytes, so we must ensure we round up to the
+  // next kMDGUIDSize byte boundary.
+  DataExtractor data;
+  const size_t text_size = sect_sp->GetFileSize();
+  const size_t read_size = std::min<size_t>(
+      llvm::alignTo(text_size, kMDGUIDSize), kBreakpadPageSize);
+  sect_sp->GetObjectFile()->GetData(sect_sp->GetFileOffset(), read_size, data);
+
+  breakpad_uuid.assign(kMDGUIDSize, 0);
+  facebook_uuid.assign(kMDGUIDSize, 0);
+
+  // The only difference between the breakpad hash and the facebook hash is the
+  // hashing of the text section size into the hash prior to hashing the .text
+  // contents.
+  for (size_t i = 0; i < kMDGUIDSize; i++)
+    facebook_uuid[i] ^= text_size % 255;
+
+  // This code carefully duplicates how the hash was created in Breakpad
+  // sources, including the error where it might has an extra 15 bytes past the
+  // end of the .text section if the .text section is less than a page size in
+  // length.
+  const uint8_t *ptr = data.GetDataStart();
+  const uint8_t *ptr_end = data.GetDataEnd();
+  while (ptr < ptr_end) {
+    for (unsigned i = 0; i < kMDGUIDSize; i++) {
+      breakpad_uuid[i] ^= ptr[i];
+      facebook_uuid[i] ^= ptr[i];
+    }
+    ptr += kMDGUIDSize;
+  }
+}
+
 } // namespace
 
 ConstString ProcessMinidump::GetPluginNameStatic() {
@@ -494,10 +560,33 @@ void ProcessMinidump::ReadModuleList() {
         const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
             mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
         if (!match) {
+          // Breakpad generates minindump files, and if there is no GNU build
+          // ID in the binary, it will calculate a UUID by hashing first 4096
+          // bytes of the .text section and using that as the UUID for a module
+          // in the minidump. Facebook uses a modified breakpad client that
+          // uses a slightly modified this hash to avoid collisions. Check for
+          // UUIDs from the minindump that match these cases and accept the
+          // module we find if they do match.
+          std::vector<uint8_t> breakpad_uuid;
+          std::vector<uint8_t> facebook_uuid;
+          HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
+          if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
+            LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
+          } else if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
+            LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
+          } else {
+            // The UUID wasn't a partial match and didn't match the .text hash
+            // so remove the module from the target, we will need to create a
+            // placeholder object file.
             GetTarget().GetImages().Remove(module_sp);
             module_sp.reset();
+          }
+        } else {
+          LLDB_LOG(log, "Partial uuid match for {0}.", name);
         }
       }
+    } else {
+      LLDB_LOG(log, "Full uuid match for {0}.", name);
     }
     if (module_sp) {
       // Watch out for place holder modules that have different paths, but the
index cc6d6fb..c4dcddb 100644 (file)
@@ -179,6 +179,69 @@ class MiniDumpUUIDTestCase(TestBase):
                            "/invalid/path/on/current/system/libuuidmismatch.so",
                            "7295E17C-6668-9E05-CBB5-DEE5003865D5")
 
+    def test_breakpad_hash_match(self):
+        """
+            Breakpad creates minidump files using CvRecord in each module whose
+            signature is set to PDB70 where the UUID is a hash generated by
+            breakpad of the .text section. This is only done when the
+            executable has no ELF build ID.
+
+            This test verifies that if we have a minidump with a 16 byte UUID,
+            that we are able to associate a symbol file with no ELF build ID
+            and match it up by hashing the .text section.
+        """
+        so_path = self.getBuildArtifact("libbreakpad.so")
+        self.yaml2obj("libbreakpad.yaml", so_path)
+        cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
+        self.dbg.HandleCommand(cmd)
+        modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
+        self.assertEqual(1, len(modules))
+        # LLDB makes up it own UUID as well when there is no build ID so we
+        # will check that this matches.
+        self.verify_module(modules[0], so_path, "D9C480E8")
+
+    def test_breakpad_overflow_hash_match(self):
+        """
+            This is a similar to test_breakpad_hash_match, but it verifies that
+            if the .text section does not end on a 16 byte boundary, then it
+            will overflow into the next section's data by up to 15 bytes. This
+            verifies that we are able to match what breakpad does as it will do
+            this.
+        """
+        so_path = self.getBuildArtifact("libbreakpad.so")
+        self.yaml2obj("libbreakpad-overflow.yaml", so_path)
+        cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
+        self.dbg.HandleCommand(cmd)
+        modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
+        self.assertEqual(1, len(modules))
+        # LLDB makes up it own UUID as well when there is no build ID so we
+        # will check that this matches.
+        self.verify_module(modules[0], so_path, "48EB9FD7")
+
+
+    def test_facebook_hash_match(self):
+        """
+            Breakpad creates minidump files using CvRecord in each module whose
+            signature is set to PDB70 where the UUID is a hash generated by
+            breakpad of the .text section and Facebook modified this hash to
+            avoid collisions. This is only done when the executable has no ELF
+            build ID.
+
+            This test verifies that if we have a minidump with a 16 byte UUID,
+            that we are able to associate a symbol file with no ELF build ID
+            and match it up by hashing the .text section like Facebook does.
+        """
+        so_path = self.getBuildArtifact("libbreakpad.so")
+        self.yaml2obj("libbreakpad.yaml", so_path)
+        cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
+        self.dbg.HandleCommand(cmd)
+        modules = self.get_minidump_modules("linux-arm-facebook-uuid-match.yaml")
+        self.assertEqual(1, len(modules))
+        # LLDB makes up it own UUID as well when there is no build ID so we
+        # will check that this matches.
+        self.verify_module(modules[0], so_path, "D9C480E8")
+
+
     def test_relative_module_name(self):
         old_cwd = os.getcwd()
         self.addTearDownHook(lambda: os.chdir(old_cwd))
diff --git a/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-overflow.yaml b/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-overflow.yaml
new file mode 100644 (file)
index 0000000..807a468
--- /dev/null
@@ -0,0 +1,21 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_ARM
+  Flags:           [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
+Sections:
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0000000000010000
+    AddressAlign:    0x0000000000000001
+    Content:         04
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_WRITE ]
+    Address:         0x0000000000010001
+    AddressAlign:    0x0000000000000001
+    Content:         0000001400000003000000474E5500
diff --git a/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad.yaml b/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad.yaml
new file mode 100644 (file)
index 0000000..53e96f6
--- /dev/null
@@ -0,0 +1,15 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_ARM
+  Flags:           [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
+Sections:
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0000000000010000
+    AddressAlign:    0x0000000000000004
+    Content:         040000001400000003000000474E5500
diff --git a/lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-breakpad-uuid-match.yaml b/lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-breakpad-uuid-match.yaml
new file mode 100644 (file)
index 0000000..3784898
--- /dev/null
@@ -0,0 +1,15 @@
+--- !minidump
+Streams:
+  - Type:            SystemInfo
+    Processor Arch:  ARM
+    Platform ID:     Linux
+    CSD Version:     '15E216'
+    CPU:
+      CPUID:           0x00000000
+  - Type:            ModuleList
+    Modules:
+      - Base of Image:   0x0000000000001000
+        Size of Image:   0x00001000
+        Module Name:     '/invalid/path/on/current/system/libbreakpad.so'
+        CodeView Record: 52534453040000001400000003000000474e55000000000000
+...
diff --git a/lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-facebook-uuid-match.yaml b/lldb/test/API/functionalities/postmortem/minidump-new/linux-arm-facebook-uuid-match.yaml
new file mode 100644 (file)
index 0000000..203fc66
--- /dev/null
@@ -0,0 +1,15 @@
+--- !minidump
+Streams:
+  - Type:            SystemInfo
+    Processor Arch:  ARM
+    Platform ID:     Linux
+    CSD Version:     '15E216'
+    CPU:
+      CPUID:           0x00000000
+  - Type:            ModuleList
+    Modules:
+      - Base of Image:   0x0000000000001000
+        Size of Image:   0x00001000
+        Module Name:     '/invalid/path/on/current/system/libbreakpad.so'
+        CodeView Record: 52534453141010100410101013101010575e45100000000000
+...