Fix parsing of object files with "early" section headers
authorPavel Labath <labath@google.com>
Mon, 5 Feb 2018 17:25:40 +0000 (17:25 +0000)
committerPavel Labath <labath@google.com>
Mon, 5 Feb 2018 17:25:40 +0000 (17:25 +0000)
ObjectFileELF::GetModuleSpecifications contained a lot of tip-toing code
which was trying to avoid loading the full object file into memory. It
did this by trying to load data only up to the offset if was accessing.
However, in practice this was useless, as 99% of object files we
encounter have section headers at the end, so we would load the whole
file as soon as we start parsing the section headers.

In fact, this would break as soon as we encounter a file which does
*not* have section headers at the end (yaml2obj produces these), as the
access to .strtab (which we need to get the section names) was not
guarded by this offset check.

As this strategy was completely ineffective anyway, I do not attempt to
proliferate it further by guarding the .strtab accesses. Instead I just
lead the full file as soon as we are reasonably sure that we are indeed
processing an elf file.

If we really care about the load size here, we would need to reimplement
this to just load the bits of the object file we need, instead of
loading everything from the start of the object file to the given
offset. However, given that the OS will do this for us for free when
using mmap, I think think this is really necessary.

For testing this I check a (tiny) SO file instead of yaml2obj-ing it
because the fact that they come out first is an implementation detail of
yaml2obj that can change in the future.

llvm-svn: 324254

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
lldb/unittests/ObjectFile/ELF/CMakeLists.txt
lldb/unittests/ObjectFile/ELF/Inputs/early-section-headers.so [new file with mode: 0644]
lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp

index 614c76b..9d60606 100644 (file)
@@ -674,29 +674,16 @@ size_t ObjectFileELF::GetModuleSpecifications(
                           __FUNCTION__, file.GetPath().c_str());
           }
 
+          data_sp = MapFileData(file, -1, file_offset);
+          if (data_sp)
+            data.SetData(data_sp);
           // In case there is header extension in the section #0, the header
           // we parsed above could have sentinel values for e_phnum, e_shnum,
           // and e_shstrndx.  In this case we need to reparse the header
           // with a bigger data source to get the actual values.
-          size_t section_header_end = header.e_shoff + header.e_shentsize;
-          if (header.HasHeaderExtension() &&
-            section_header_end > data_sp->GetByteSize()) {
-            data_sp = MapFileData(file, section_header_end, file_offset);
-            if (data_sp) {
-              data.SetData(data_sp);
-              lldb::offset_t header_offset = data_offset;
-              header.Parse(data, &header_offset);
-            }
-          }
-
-          // Try to get the UUID from the section list. Usually that's at the
-          // end, so map the file in if we don't have it already.
-          section_header_end =
-              header.e_shoff + header.e_shnum * header.e_shentsize;
-          if (section_header_end > data_sp->GetByteSize()) {
-            data_sp = MapFileData(file, section_header_end, file_offset);
-            if (data_sp)
-              data.SetData(data_sp);
+          if (header.HasHeaderExtension()) {
+            lldb::offset_t header_offset = data_offset;
+            header.Parse(data, &header_offset);
           }
 
           uint32_t gnu_debuglink_crc = 0;
@@ -733,39 +720,14 @@ size_t ObjectFileELF::GetModuleSpecifications(
               // contents crc32 would be too much of luxury.  Thus we will need
               // to fallback to something simpler.
               if (header.e_type == llvm::ELF::ET_CORE) {
-                size_t program_headers_end =
-                    header.e_phoff + header.e_phnum * header.e_phentsize;
-                if (program_headers_end > data_sp->GetByteSize()) {
-                  data_sp = MapFileData(file, program_headers_end, file_offset);
-                  if (data_sp)
-                    data.SetData(data_sp);
-                }
                 ProgramHeaderColl program_headers;
                 GetProgramHeaderInfo(program_headers, data, header);
 
-                size_t segment_data_end = 0;
-                for (ProgramHeaderCollConstIter I = program_headers.begin();
-                     I != program_headers.end(); ++I) {
-                  segment_data_end = std::max<unsigned long long>(
-                      I->p_offset + I->p_filesz, segment_data_end);
-                }
-
-                if (segment_data_end > data_sp->GetByteSize()) {
-                  data_sp = MapFileData(file, segment_data_end, file_offset);
-                  if (data_sp)
-                    data.SetData(data_sp);
-                }
-
                 core_notes_crc =
                     CalculateELFNotesSegmentsCRC32(program_headers, data);
               } else {
-                // Need to map entire file into memory to calculate the crc.
-                data_sp = MapFileData(file, -1, file_offset);
-                if (data_sp) {
-                  data.SetData(data_sp);
-                  gnu_debuglink_crc = calc_gnu_debuglink_crc32(
-                      data.GetDataStart(), data.GetByteSize());
-                }
+                gnu_debuglink_crc = calc_gnu_debuglink_crc32(
+                    data.GetDataStart(), data.GetByteSize());
               }
             }
             if (gnu_debuglink_crc) {
index 652c221..3d53f54 100644 (file)
@@ -13,6 +13,7 @@ add_dependencies(ObjectFileELFTests yaml2obj)
 add_definitions(-DYAML2OBJ="$<TARGET_FILE:yaml2obj>")
 
 set(test_inputs
+  early-section-headers.so
   sections-resolve-consistently.yaml
   )
 add_unittest_inputs(ObjectFileELFTests "${test_inputs}")
diff --git a/lldb/unittests/ObjectFile/ELF/Inputs/early-section-headers.so b/lldb/unittests/ObjectFile/ELF/Inputs/early-section-headers.so
new file mode 100644 (file)
index 0000000..4d52974
Binary files /dev/null and b/lldb/unittests/ObjectFile/ELF/Inputs/early-section-headers.so differ
index 056799e..fe8ea7c 100644 (file)
@@ -98,3 +98,17 @@ TEST_F(ObjectFileELFTest, SectionsResolveConsistently) {
   ASSERT_NE(nullptr, start);
   EXPECT_EQ(text_sp, start->GetAddress().GetSection());
 }
+
+// Test that GetModuleSpecifications works on an "atypical" object file which
+// has section headers right after the ELF header (instead of the more common
+// layout where the section headers are at the very end of the object file).
+TEST_F(ObjectFileELFTest, GetModuleSpecifications_EarlySectionHeaders) {
+  std::string SO = GetInputFilePath("early-section-headers.so");
+  ModuleSpecList Specs;
+  ASSERT_EQ(1u, ObjectFile::GetModuleSpecifications(FileSpec(SO, false), 0, 0, Specs));
+  ModuleSpec Spec;
+  ASSERT_TRUE(Specs.GetModuleSpecAtIndex(0, Spec)) ;
+  UUID Uuid;
+  Uuid.SetFromStringRef("1b8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9", 20);
+  EXPECT_EQ(Spec.GetUUID(), Uuid);
+}