Add a new report_load_commands option to jGetLoadedDynamicLibrariesInfos
authorJason Molenda <jason@molenda.com>
Tue, 9 May 2023 03:28:52 +0000 (20:28 -0700)
committerJason Molenda <jason@molenda.com>
Tue, 9 May 2023 03:34:58 +0000 (20:34 -0700)
jGetLoadedDynamicLibrariesInfos has a mode where it will list
every binary in the process - the load address and filepath from dyld
SPI, and the mach-o header and load commands from a scan by debugserver
for perf reasons.  With a large enough number of libraries, creating
that StructuredData representation of all of this, and formatting it
into an ascii string to send up to lldb, can grow debugserver's heap
size too large for some environments.

This patch adds a new report_load_commands:false boolean to the
jGetLoadedDynamicLibrariesInfos packet, where debugserver will now
only report the dyld SPI load address and filepath for all of the
binaries.  lldb can then ask for the detailed information on
the process binaries in smaller chunks, and avoid debugserver
having ever growing heap use as the number of binaries inevitably
increases.

This patch also removes a version of jGetLoadedDynamicLibrariesInfos
for pre-iOS 10 and pre-macOS 10.12 systems where we did not use
dyld SPI.  We can't back compile to those OS builds any longer
with modern Xcode.

Finally, it removes a requirement in DynamicLoaderMacOS that the
JSON reply from jGetLoadedDynamicLibrariesInfos include the
mod_date field for each binary.  This has always been reported as
0 in modern dyld, and is another reason for packet growth in
the reply.  debugserver still puts the mod_date field in its replies
for interop with existing lldb's, but we will be able to remove it
the field from debugserver's output after the next release cycle
when this patch has had time to circulate.

I'll add lldb support for requesting the load addresses only
and splitting the request up into chunks in a separate patch.

Differential Revision: https://reviews.llvm.org/D150158
rdar://107848326

lldb/docs/lldb-gdb-remote.txt
lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h
lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
lldb/tools/debugserver/source/DNB.cpp
lldb/tools/debugserver/source/DNB.h
lldb/tools/debugserver/source/MacOSX/MachProcess.h
lldb/tools/debugserver/source/MacOSX/MachProcess.mm
lldb/tools/debugserver/source/RNBRemote.cpp

index b426978..28180df 100644 (file)
@@ -2001,19 +2001,16 @@ for this region.
 //  This packet asks the remote debug stub to send the details about libraries
 //  being added/removed from the process as a performance optimization.
 //
-//  There are three ways this packet can be used.  All three return a dictionary of
+//  There are two ways this packet can be used.  Both return a dictionary of
 //  binary images formatted the same way.
 //
-//  On OS X 10.11, iOS 9, tvOS 9, watchOS 2 and earlier, the packet is used like
-//       jGetLoadedDynamicLibrariesInfos:{"image_count":1,"image_list_address":140734800075128}
-//  where the image_list_address is an array of {void* load_addr, void* mod_date, void* pathname}
-//  in the inferior process memory (and image_count is the number of elements in this array).
-//  lldb is using information from the dyld_all_image_infos structure to make these requests to
-//  debugserver.  This use is not supported on macOS 10.12, iOS 10, tvOS 10, watchOS 3 or newer.
-//
-//  On macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer, there are two calls.  One requests information
-//  on all shared libraries:
+//  One requests information on all shared libraries:
 //       jGetLoadedDynamicLibrariesInfos:{"fetch_all_solibs":true}
+//  with an optional `"report_load_commands":false` which can be added, asking
+//  that only the dyld SPI information (load addresses, filenames) be returned.
+//  The default behavior is that debugserver scans the mach-o header and load 
+//  commands of each binary, and returns it in the JSON reply.
+//
 //  And the second requests information about a list of shared libraries, given their load addresses:
 //       jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[8382824135,3258302053,830202858503]}
 //
index 6506d00..8bcc935 100644 (file)
@@ -372,7 +372,6 @@ bool DynamicLoaderDarwin::JSONImageInformationIntoImageInfo(
     // clang-format off
     if (!image->HasKey("load_address") ||
         !image->HasKey("pathname") ||
-        !image->HasKey("mod_date") ||
         !image->HasKey("mach_header") ||
         image->GetValueForKey("mach_header")->GetAsDictionary() == nullptr ||
         !image->HasKey("segments") ||
@@ -383,8 +382,6 @@ bool DynamicLoaderDarwin::JSONImageInformationIntoImageInfo(
     // clang-format on
     image_infos[i].address =
         image->GetValueForKey("load_address")->GetAsInteger()->GetValue();
-    image_infos[i].mod_date =
-        image->GetValueForKey("mod_date")->GetAsInteger()->GetValue();
     image_infos[i].file_spec.SetFile(
         image->GetValueForKey("pathname")->GetAsString()->GetValue(),
         FileSpec::Style::native);
@@ -811,11 +808,11 @@ void DynamicLoaderDarwin::ImageInfo::PutToLog(Log *log) const {
   if (!log)
     return;
   if (address == LLDB_INVALID_ADDRESS) {
-    LLDB_LOG(log, "modtime={0:x+8} uuid={1} path='{2}' (UNLOADED)", mod_date,
-             uuid.GetAsString(), file_spec.GetPath());
+    LLDB_LOG(log, "uuid={1} path='{2}' (UNLOADED)", uuid.GetAsString(),
+             file_spec.GetPath());
   } else {
-    LLDB_LOG(log, "address={0:x+16} modtime={1:x+8} uuid={2} path='{3}'",
-             address, mod_date, uuid.GetAsString(), file_spec.GetPath());
+    LLDB_LOG(log, "address={0:x+16} uuid={2} path='{3}'", address,
+             uuid.GetAsString(), file_spec.GetPath());
     for (uint32_t i = 0; i < segments.size(); ++i)
       segments[i].PutToLog(log, slide);
   }
index 0268505..8f9a29c 100644 (file)
@@ -100,8 +100,6 @@ protected:
     /// The amount to slide all segments by if there is a global
     /// slide.
     lldb::addr_t slide = 0;
-    /// Modification date for this dylib.
-    lldb::addr_t mod_date = 0;
     /// Resolved path for this dylib.
     lldb_private::FileSpec file_spec;
     /// UUID for this dylib if it has one, else all zeros.
@@ -128,7 +126,6 @@ protected:
       if (!load_cmd_data_only) {
         address = LLDB_INVALID_ADDRESS;
         slide = 0;
-        mod_date = 0;
         file_spec.Clear();
         ::memset(&header, 0, sizeof(header));
       }
@@ -142,8 +139,7 @@ protected:
 
     bool operator==(const ImageInfo &rhs) const {
       return address == rhs.address && slide == rhs.slide &&
-             mod_date == rhs.mod_date && file_spec == rhs.file_spec &&
-             uuid == rhs.uuid &&
+             file_spec == rhs.file_spec && uuid == rhs.uuid &&
              memcmp(&header, &rhs.header, sizeof(header)) == 0 &&
              segments == rhs.segments && os_type == rhs.os_type &&
              os_env == rhs.os_env;
index 64b035c..8df3e8e 100644 (file)
@@ -693,7 +693,7 @@ bool DynamicLoaderMacOSXDYLD::ReadImageInfos(
          i++) {
       image_infos[i].address = info_data_ref.GetAddress(&info_data_offset);
       lldb::addr_t path_addr = info_data_ref.GetAddress(&info_data_offset);
-      image_infos[i].mod_date = info_data_ref.GetAddress(&info_data_offset);
+      info_data_ref.GetAddress(&info_data_offset); // mod_date, unused */
 
       char raw_path[PATH_MAX];
       m_process->ReadCStringFromMemory(path_addr, raw_path, sizeof(raw_path),
index bab5b4a..8a8b1ee 100644 (file)
@@ -1023,20 +1023,11 @@ DNBGetTSDAddressForThread(nub_process_t pid, nub_thread_t tid,
   return INVALID_NUB_ADDRESS;
 }
 
-JSONGenerator::ObjectSP DNBGetLoadedDynamicLibrariesInfos(
-    nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) {
-  MachProcessSP procSP;
-  if (GetProcessSP(pid, procSP)) {
-    return procSP->GetLoadedDynamicLibrariesInfos(pid, image_list_address,
-                                                  image_count);
-  }
-  return JSONGenerator::ObjectSP();
-}
-
-JSONGenerator::ObjectSP DNBGetAllLoadedLibrariesInfos(nub_process_t pid) {
+JSONGenerator::ObjectSP
+DNBGetAllLoadedLibrariesInfos(nub_process_t pid, bool report_load_commands) {
   MachProcessSP procSP;
   if (GetProcessSP(pid, procSP)) {
-    return procSP->GetAllLoadedLibrariesInfos(pid);
+    return procSP->GetAllLoadedLibrariesInfos(pid, report_load_commands);
   }
   return JSONGenerator::ObjectSP();
 }
index 3108f1a..d8ccdea 100644 (file)
@@ -210,9 +210,8 @@ DNBGetTSDAddressForThread(nub_process_t pid, nub_thread_t tid,
                           uint64_t plo_pthread_tsd_base_address_offset,
                           uint64_t plo_pthread_tsd_base_offset,
                           uint64_t plo_pthread_tsd_entry_size);
-JSONGenerator::ObjectSP DNBGetLoadedDynamicLibrariesInfos(
-    nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count);
-JSONGenerator::ObjectSP DNBGetAllLoadedLibrariesInfos(nub_process_t pid);
+JSONGenerator::ObjectSP
+DNBGetAllLoadedLibrariesInfos(nub_process_t pid, bool report_load_commands);
 JSONGenerator::ObjectSP
 DNBGetLibrariesInfoForAddresses(nub_process_t pid,
                                 std::vector<uint64_t> &macho_addresses);
index bcdd0f6..a84bfb9 100644 (file)
@@ -72,12 +72,11 @@ public:
   struct binary_image_information {
     std::string filename;
     uint64_t load_address;
-    uint64_t mod_date; // may not be available - 0 if so
     struct mach_o_information macho_info;
     bool is_valid_mach_header;
 
     binary_image_information()
-        : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0),
+        : filename(), load_address(INVALID_NUB_ADDRESS),
           is_valid_mach_header(false) {}
   };
 
@@ -259,7 +258,8 @@ public:
                                      int wordsize,
                                      struct mach_o_information &inf);
   JSONGenerator::ObjectSP FormatDynamicLibrariesIntoJSON(
-      const std::vector<struct binary_image_information> &image_infos);
+      const std::vector<struct binary_image_information> &image_infos,
+      bool report_load_commands);
   uint32_t GetPlatform();
   /// Get the runtime platform from DYLD via SPI.
   uint32_t GetProcessPlatformViaDYLDSPI();
@@ -271,12 +271,12 @@ public:
   /// command details.
   void GetAllLoadedBinariesViaDYLDSPI(
       std::vector<struct binary_image_information> &image_infos);
-  JSONGenerator::ObjectSP GetLoadedDynamicLibrariesInfos(
-      nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count);
   JSONGenerator::ObjectSP
   GetLibrariesInfoForAddresses(nub_process_t pid,
                                std::vector<uint64_t> &macho_addresses);
-  JSONGenerator::ObjectSP GetAllLoadedLibrariesInfos(nub_process_t pid);
+  JSONGenerator::ObjectSP
+  GetAllLoadedLibrariesInfos(nub_process_t pid,
+                             bool fetch_report_load_commands);
   JSONGenerator::ObjectSP GetSharedCacheInfo(nub_process_t pid);
 
   nub_size_t GetNumThreads() const;
index b6e5e57..dcc1579 100644 (file)
@@ -912,22 +912,34 @@ bool MachProcess::GetMachOInformationFromMemory(
 // create a JSONGenerator object
 // with all the details we want to send to lldb.
 JSONGenerator::ObjectSP MachProcess::FormatDynamicLibrariesIntoJSON(
-    const std::vector<struct binary_image_information> &image_infos) {
+    const std::vector<struct binary_image_information> &image_infos,
+    bool report_load_commands) {
 
   JSONGenerator::ArraySP image_infos_array_sp(new JSONGenerator::Array());
 
   const size_t image_count = image_infos.size();
 
   for (size_t i = 0; i < image_count; i++) {
-    if (!image_infos[i].is_valid_mach_header)
+    // If we should report the Mach-O header and load commands,
+    // and those were unreadable, don't report anything about this
+    // binary.
+    if (report_load_commands && !image_infos[i].is_valid_mach_header)
       continue;
     JSONGenerator::DictionarySP image_info_dict_sp(
         new JSONGenerator::Dictionary());
     image_info_dict_sp->AddIntegerItem("load_address",
                                        image_infos[i].load_address);
-    image_info_dict_sp->AddIntegerItem("mod_date", image_infos[i].mod_date);
+    // TODO: lldb currently rejects a response without this, but it
+    // is always zero from dyld.  It can be removed once we've had time
+    // for lldb's that require it to be present are obsolete.
+    image_info_dict_sp->AddIntegerItem("mod_date", 0);
     image_info_dict_sp->AddStringItem("pathname", image_infos[i].filename);
 
+    if (!report_load_commands) {
+      image_infos_array_sp->AddItem(image_info_dict_sp);
+      continue;
+    }
+
     uuid_string_t uuidstr;
     uuid_unparse_upper(image_infos[i].macho_info.uuid, uuidstr);
     image_info_dict_sp->AddStringItem("uuid", uuidstr);
@@ -1000,109 +1012,6 @@ JSONGenerator::ObjectSP MachProcess::FormatDynamicLibrariesIntoJSON(
   return reply_sp;
 }
 
-// Get the shared library information using the old (pre-macOS 10.12, pre-iOS
-// 10, pre-tvOS 10, pre-watchOS 3)
-// code path.  We'll be given the address of an array of structures in the form
-// {void* load_addr, void* mod_date, void* pathname}
-//
-// In macOS 10.12 etc and newer, we'll use SPI calls into dyld to gather this
-// information.
-JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos(
-    nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) {
-
-  JSONGenerator::ObjectSP empty_reply_sp(new JSONGenerator::Dictionary());
-  int pointer_size = GetInferiorAddrSize(pid);
-
-  std::vector<struct binary_image_information> image_infos;
-  size_t image_infos_size = image_count * 3 * pointer_size;
-
-  uint8_t *image_info_buf = (uint8_t *)malloc(image_infos_size);
-  if (image_info_buf == NULL) {
-    return empty_reply_sp;
-  }
-  if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
-      image_infos_size) {
-    return empty_reply_sp;
-  }
-
-  /// First the image_infos array with (load addr, pathname, mod date)
-  /// tuples
-
-  for (size_t i = 0; i < image_count; i++) {
-    struct binary_image_information info;
-    nub_addr_t pathname_address;
-    if (pointer_size == 4) {
-      uint32_t load_address_32;
-      uint32_t pathname_address_32;
-      uint32_t mod_date_32;
-      ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
-      ::memcpy(&pathname_address_32,
-               image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
-      ::memcpy(&mod_date_32,
-               image_info_buf + (i * 3 * pointer_size) + pointer_size +
-                   pointer_size,
-               4);
-      info.load_address = load_address_32;
-      info.mod_date = mod_date_32;
-      pathname_address = pathname_address_32;
-    } else {
-      uint64_t load_address_64;
-      uint64_t pathname_address_64;
-      uint64_t mod_date_64;
-      ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
-      ::memcpy(&pathname_address_64,
-               image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
-      ::memcpy(&mod_date_64,
-               image_info_buf + (i * 3 * pointer_size) + pointer_size +
-                   pointer_size,
-               8);
-      info.load_address = load_address_64;
-      info.mod_date = mod_date_64;
-      pathname_address = pathname_address_64;
-    }
-    char strbuf[17];
-    info.filename = "";
-    uint64_t pathname_ptr = pathname_address;
-    bool still_reading = true;
-    while (still_reading && ReadMemory(pathname_ptr, sizeof(strbuf) - 1,
-                                       strbuf) == sizeof(strbuf) - 1) {
-      strbuf[sizeof(strbuf) - 1] = '\0';
-      info.filename += strbuf;
-      pathname_ptr += sizeof(strbuf) - 1;
-      // Stop if we found nul byte indicating the end of the string
-      for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
-        if (strbuf[i] == '\0') {
-          still_reading = false;
-          break;
-        }
-      }
-    }
-    uuid_clear(info.macho_info.uuid);
-    image_infos.push_back(info);
-  }
-  if (image_infos.size() == 0) {
-    return empty_reply_sp;
-  }
-
-  free(image_info_buf);
-
-  ///  Second, read the mach header / load commands for all the dylibs
-
-  for (size_t i = 0; i < image_count; i++) {
-    // The SPI to provide platform is not available on older systems.
-    uint32_t platform = 0;
-    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
-                                      pointer_size,
-                                      image_infos[i].macho_info)) {
-      image_infos[i].is_valid_mach_header = true;
-    }
-  }
-
-  ///  Third, format all of the above in the JSONGenerator object.
-
-  return FormatDynamicLibrariesIntoJSON(image_infos);
-}
-
 /// From dyld SPI header dyld_process_info.h
 typedef void *dyld_process_info;
 struct dyld_process_cache_info {
@@ -1162,21 +1071,24 @@ void MachProcess::GetAllLoadedBinariesViaDYLDSPI(
 // in
 // macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
 JSONGenerator::ObjectSP
-MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
+MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid,
+                                        bool report_load_commands) {
 
   int pointer_size = GetInferiorAddrSize(pid);
   std::vector<struct binary_image_information> image_infos;
   GetAllLoadedBinariesViaDYLDSPI(image_infos);
-  uint32_t platform = GetPlatform();
-  const size_t image_count = image_infos.size();
-  for (size_t i = 0; i < image_count; i++) {
-    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
-                                      pointer_size,
-                                      image_infos[i].macho_info)) {
-      image_infos[i].is_valid_mach_header = true;
+  if (report_load_commands) {
+    uint32_t platform = GetPlatform();
+    const size_t image_count = image_infos.size();
+    for (size_t i = 0; i < image_count; i++) {
+      if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                        pointer_size,
+                                        image_infos[i].macho_info)) {
+        image_infos[i].is_valid_mach_header = true;
+      }
     }
   }
-    return FormatDynamicLibrariesIntoJSON(image_infos);
+  return FormatDynamicLibrariesIntoJSON(image_infos, report_load_commands);
 }
 
 // Fetch information about the shared libraries at the given load addresses
@@ -1226,7 +1138,8 @@ JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses(
         image_infos[i].is_valid_mach_header = true;
       }
     }
-    return FormatDynamicLibrariesIntoJSON(image_infos);
+    return FormatDynamicLibrariesIntoJSON(image_infos,
+                                          /* report_load_commands =  */ true);
 }
 
 // From dyld's internal podyld_process_info.h:
index a9bda15..db0dc5f 100644 (file)
@@ -5927,21 +5927,17 @@ rnb_err_t RNBRemote::HandlePacket_jThreadExtendedInfo(const char *p) {
   return SendPacket("OK");
 }
 
-//  This packet may be called in one of three ways:
-//
-//  jGetLoadedDynamicLibrariesInfos:{"image_count":40,"image_list_address":4295244704}
-//      Look for an array of the old dyld_all_image_infos style of binary infos
-//      at the image_list_address.
-//      This an array of {void* load_addr, void* mod_date, void* pathname}
+//  This packet may be called in one of two ways:
 //
 //  jGetLoadedDynamicLibrariesInfos:{"fetch_all_solibs":true}
-//      Use the new style (macOS 10.12, tvOS 10, iOS 10, watchOS 3) dyld SPI to
-//      get a list of all the
-//      libraries loaded
+//      Use the new dyld SPI to get a list of all the libraries loaded.
+//      If "report_load_commands":false" is present, only the dyld SPI
+//      provided information (load address, filepath) is returned.
+//      lldb can ask for the mach-o header/load command details in a
+//      separate packet.
 //
 //  jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[8382824135,3258302053,830202858503]}
-//      Use the new style (macOS 10.12, tvOS 10, iOS 10, watchOS 3) dyld SPI to
-//      get the information
+//      Use the dyld SPI and Mach-O parsing in memory to get the information
 //      about the libraries loaded at these addresses.
 //
 rnb_err_t
@@ -5964,24 +5960,17 @@ RNBRemote::HandlePacket_jGetLoadedDynamicLibrariesInfos(const char *p) {
 
     std::vector<uint64_t> macho_addresses;
     bool fetch_all_solibs = false;
+    bool report_load_commands = true;
+    get_boolean_value_for_key_name_from_json("report_load_commands", p,
+                                             report_load_commands);
+
     if (get_boolean_value_for_key_name_from_json("fetch_all_solibs", p,
                                                  fetch_all_solibs) &&
         fetch_all_solibs) {
-      json_sp = DNBGetAllLoadedLibrariesInfos(pid);
+      json_sp = DNBGetAllLoadedLibrariesInfos(pid, report_load_commands);
     } else if (get_array_of_ints_value_for_key_name_from_json(
                    "solib_addresses", p, macho_addresses)) {
       json_sp = DNBGetLibrariesInfoForAddresses(pid, macho_addresses);
-    } else {
-      nub_addr_t image_list_address =
-          get_integer_value_for_key_name_from_json("image_list_address", p);
-      nub_addr_t image_count =
-          get_integer_value_for_key_name_from_json("image_count", p);
-
-      if (image_list_address != INVALID_NUB_ADDRESS &&
-          image_count != INVALID_NUB_ADDRESS) {
-        json_sp = DNBGetLoadedDynamicLibrariesInfos(pid, image_list_address,
-                                                    image_count);
-      }
     }
 
     if (json_sp.get()) {