1 /* Sniff out modules from ELF headers visible in memory segments.
2 Copyright (C) 2008-2012 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
30 #include "../libelf/libelfP.h" /* For NOTE_ALIGN. */
37 #include <sys/param.h>
43 /* A good size for the initial read from memory, if it's not too costly.
44 This more than covers the phdrs and note segment in the average 64-bit
47 #define INITIAL_READ 1024
49 #if __BYTE_ORDER == __LITTLE_ENDIAN
50 # define MY_ELFDATA ELFDATA2LSB
52 # define MY_ELFDATA ELFDATA2MSB
56 /* Return user segment index closest to ADDR but not above it.
57 If NEXT, return the closest to ADDR but not below it. */
59 addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr, bool next)
64 if (dwfl->lookup_segndx[segment] >= 0)
65 ndx = dwfl->lookup_segndx[segment];
66 if (++segment >= dwfl->lookup_elts - 1)
67 return next ? ndx + 1 : ndx;
69 while (dwfl->lookup_addr[segment] < addr);
73 while (dwfl->lookup_segndx[segment] < 0)
74 if (++segment >= dwfl->lookup_elts - 1)
76 ndx = dwfl->lookup_segndx[segment];
83 dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
84 Dwfl_Memory_Callback *memory_callback,
85 void *memory_callback_arg,
86 Dwfl_Module_Callback *read_eagerly,
87 void *read_eagerly_arg,
88 const struct r_debug_info *r_debug_info)
92 if (segment >= dwfl->lookup_elts)
93 segment = dwfl->lookup_elts - 1;
96 && (dwfl->lookup_segndx[segment] > ndx
97 || dwfl->lookup_segndx[segment] == -1))
100 while (dwfl->lookup_segndx[segment] < ndx)
101 if (++segment == dwfl->lookup_elts)
104 GElf_Addr start = dwfl->lookup_addr[segment];
106 inline bool segment_read (int segndx,
107 void **buffer, size_t *buffer_available,
108 GElf_Addr addr, size_t minread)
110 return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available,
111 addr, minread, memory_callback_arg);
114 inline void release_buffer (void **buffer, size_t *buffer_available)
117 (void) segment_read (-1, buffer, buffer_available, 0, 0);
120 /* First read in the file header and check its sanity. */
123 size_t buffer_available = INITIAL_READ;
125 inline int finish (void)
127 release_buffer (&buffer, &buffer_available);
131 if (segment_read (ndx, &buffer, &buffer_available,
132 start, sizeof (Elf64_Ehdr))
133 || memcmp (buffer, ELFMAG, SELFMAG) != 0)
136 inline bool read_portion (void **data, size_t *data_size,
137 GElf_Addr vaddr, size_t filesz)
139 if (vaddr - start + filesz > buffer_available
140 /* If we're in string mode, then don't consider the buffer we have
141 sufficient unless it contains the terminator of the string. */
142 || (filesz == 0 && memchr (vaddr - start + buffer, '\0',
143 buffer_available - (vaddr - start)) == NULL))
147 return segment_read (addr_segndx (dwfl, segment, vaddr, false),
148 data, data_size, vaddr, filesz);
151 /* We already have this whole note segment from our initial read. */
152 *data = vaddr - start + buffer;
157 inline void finish_portion (void **data, size_t *data_size)
160 release_buffer (data, data_size);
163 /* Extract the information we need from the file header. */
171 uint_fast16_t phentsize;
175 .d_type = ELF_T_EHDR,
176 .d_buf = (void *) buffer,
177 .d_version = EV_CURRENT,
181 .d_type = ELF_T_EHDR,
183 .d_size = sizeof ehdr,
184 .d_version = EV_CURRENT,
186 switch (((const unsigned char *) buffer)[EI_CLASS])
189 xlatefrom.d_size = sizeof (Elf32_Ehdr);
190 if (elf32_xlatetom (&xlateto, &xlatefrom,
191 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
193 phoff = ehdr.e32.e_phoff;
194 phnum = ehdr.e32.e_phnum;
195 phentsize = ehdr.e32.e_phentsize;
196 if (phentsize != sizeof (Elf32_Phdr))
198 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize;
202 xlatefrom.d_size = sizeof (Elf64_Ehdr);
203 if (elf64_xlatetom (&xlateto, &xlatefrom,
204 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
206 phoff = ehdr.e64.e_phoff;
207 phnum = ehdr.e64.e_phnum;
208 phentsize = ehdr.e64.e_phentsize;
209 if (phentsize != sizeof (Elf64_Phdr))
211 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize;
218 /* The file header tells where to find the program headers.
219 These are what we need to find the boundaries of the module.
220 Without them, we don't have a module to report. */
225 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
226 xlatefrom.d_size = phnum * phentsize;
228 void *ph_buffer = NULL;
229 size_t ph_buffer_size = 0;
230 if (read_portion (&ph_buffer, &ph_buffer_size,
231 start + phoff, xlatefrom.d_size))
234 xlatefrom.d_buf = ph_buffer;
238 Elf32_Phdr p32[phnum];
239 Elf64_Phdr p64[phnum];
242 xlateto.d_buf = &phdrs;
243 xlateto.d_size = sizeof phdrs;
245 /* Track the bounds of the file visible in memory. */
246 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */
247 GElf_Off file_end = 0; /* Rounded up to effective page size. */
248 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */
249 GElf_Off total_filesz = 0; /* Total size of data to read. */
251 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */
253 bool found_bias = false;
255 /* Collect the unbiased bounds of the module here. */
256 GElf_Addr module_start = -1l;
257 GElf_Addr module_end = 0;
258 GElf_Addr module_address_sync = 0;
260 /* If we see PT_DYNAMIC, record it here. */
261 GElf_Addr dyn_vaddr = 0;
262 GElf_Xword dyn_filesz = 0;
264 /* Collect the build ID bits here. */
265 void *build_id = NULL;
266 size_t build_id_len = 0;
267 GElf_Addr build_id_vaddr = 0;
269 /* Consider a PT_NOTE we've found in the image. */
270 inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz)
272 /* If we have already seen a build ID, we don't care any more. */
273 if (build_id != NULL || filesz == 0)
278 if (read_portion (&data, &data_size, vaddr, filesz))
281 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
284 if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA)
288 notes = malloc (filesz);
289 if (unlikely (notes == NULL))
291 xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR;
292 xlatefrom.d_buf = (void *) data;
293 xlatefrom.d_size = filesz;
294 xlateto.d_buf = notes;
295 xlateto.d_size = filesz;
296 if (elf32_xlatetom (&xlateto, &xlatefrom,
297 ehdr.e32.e_ident[EI_DATA]) == NULL)
301 const GElf_Nhdr *nh = notes;
302 while ((const void *) nh < (const void *) notes + filesz)
304 const void *note_name = nh + 1;
305 const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz);
306 if (unlikely ((size_t) ((const void *) notes + filesz
307 - note_desc) < nh->n_descsz))
310 if (nh->n_type == NT_GNU_BUILD_ID
312 && nh->n_namesz == sizeof "GNU"
313 && !memcmp (note_name, "GNU", sizeof "GNU"))
315 build_id_vaddr = note_desc - (const void *) notes + vaddr;
316 build_id_len = nh->n_descsz;
317 build_id = malloc (nh->n_descsz);
318 if (likely (build_id != NULL))
319 memcpy (build_id, note_desc, build_id_len);
323 nh = note_desc + NOTE_ALIGN (nh->n_descsz);
329 finish_portion (&data, &data_size);
332 /* Consider each of the program headers we've read from the image. */
333 inline void consider_phdr (GElf_Word type,
334 GElf_Addr vaddr, GElf_Xword memsz,
335 GElf_Off offset, GElf_Xword filesz,
346 /* We calculate from the p_offset of the note segment,
347 because we don't yet know the bias for its p_vaddr. */
348 consider_notes (start + offset, filesz);
352 align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1;
354 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
355 GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end;
356 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
358 if (file_trimmed_end < offset + filesz)
360 file_trimmed_end = offset + filesz;
362 /* Trim the last segment so we don't bother with zeros
363 in the last page that are off the end of the file.
364 However, if the extra bit in that page includes the
365 section headers, keep them. */
366 if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end)
368 filesz += shdrs_end - file_trimmed_end;
369 file_trimmed_end = shdrs_end;
373 total_filesz += filesz;
375 if (file_end < filesz_offset)
377 file_end = filesz_offset;
378 if (filesz_vaddr - start == filesz_offset)
379 contiguous = file_end;
382 if (!found_bias && (offset & -align) == 0
383 && likely (filesz_offset >= phoff + phnum * phentsize))
385 bias = start - vaddr;
389 if ((vaddr & -align) < module_start)
391 module_start = vaddr & -align;
392 module_address_sync = vaddr + memsz;
395 if (module_end < vaddr_end)
396 module_end = vaddr_end;
400 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
402 if (elf32_xlatetom (&xlateto, &xlatefrom,
403 ehdr.e32.e_ident[EI_DATA]) == NULL)
404 found_bias = false; /* Trigger error check. */
406 for (uint_fast16_t i = 0; i < phnum; ++i)
407 consider_phdr (phdrs.p32[i].p_type,
408 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz,
409 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz,
410 phdrs.p32[i].p_align);
414 if (elf64_xlatetom (&xlateto, &xlatefrom,
415 ehdr.e32.e_ident[EI_DATA]) == NULL)
416 found_bias = false; /* Trigger error check. */
418 for (uint_fast16_t i = 0; i < phnum; ++i)
419 consider_phdr (phdrs.p64[i].p_type,
420 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz,
421 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz,
422 phdrs.p64[i].p_align);
425 finish_portion (&ph_buffer, &ph_buffer_size);
427 /* We must have seen the segment covering offset 0, or else the ELF
428 header we read at START was not produced by these program headers. */
429 if (unlikely (!found_bias))
435 /* Now we know enough to report a module for sure: its bounds. */
436 module_start += bias;
441 /* NAME found from link map has precedence over DT_SONAME possibly read
443 bool name_is_final = false;
445 /* Try to match up DYN_VADDR against L_LD as found in link map.
446 Segments sniffing may guess invalid address as the first read-only memory
447 mapping may not be dumped to the core file (if ELF headers are not dumped)
448 and the ELF header is dumped first with the read/write mapping of the same
449 file at higher addresses. */
450 if (r_debug_info != NULL)
451 for (const struct r_debug_info_module *module = r_debug_info->module;
452 module != NULL; module = module->next)
453 if (module_start <= module->l_ld && module->l_ld < module_end)
455 /* L_LD read from link map must be right while DYN_VADDR is unsafe.
456 Therefore subtract DYN_VADDR and add L_LD to get a possibly
457 corrective displacement for all addresses computed so far. */
458 GElf_Addr fixup = module->l_ld - dyn_vaddr;
459 if ((fixup & (dwfl->segment_align - 1)) == 0
460 && module_start + fixup <= module->l_ld
461 && module->l_ld < module_end + fixup)
463 module_start += fixup;
467 if (module->name[0] != '\0')
469 name = basename (module->name);
470 name_is_final = true;
476 if (r_debug_info != NULL)
478 bool skip_this_module = false;
479 for (struct r_debug_info_module *module = r_debug_info->module;
480 module != NULL; module = module->next)
481 if ((module_end > module->start && module_start < module->end)
482 || dyn_vaddr == module->l_ld)
484 bool close_elf = false;
485 if (! module->disk_file_has_build_id && build_id_len > 0)
487 /* Module found in segments with build-id is more reliable
488 than a module found via DT_DEBUG on disk without any
490 if (module->elf != NULL)
493 if (module->elf != NULL
494 && module->disk_file_has_build_id && build_id_len > 0)
496 const void *elf_build_id;
497 GElf_Addr elf_build_id_elfaddr;
498 int elf_build_id_len;
500 if (__libdwfl_find_elf_build_id (NULL, module->elf,
502 &elf_build_id_elfaddr,
503 &elf_build_id_len) > 0)
505 if (build_id_len != (size_t) elf_build_id_len
506 || memcmp (build_id, elf_build_id, build_id_len) != 0)
512 elf_end (module->elf);
517 if (module->elf != NULL)
519 /* Ignore this found module if it would conflict in address
520 space with any already existing module of DWFL. */
521 skip_this_module = true;
524 if (skip_this_module)
531 /* Our return value now says to skip the segments contained
532 within the module. */
533 ndx = addr_segndx (dwfl, segment, module_end, true);
535 /* Examine its .dynamic section to get more interesting details.
536 If it has DT_SONAME, we'll use that as the module name.
537 If it has a DT_DEBUG, then it's actually a PIE rather than a DSO.
538 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
539 and they also tell us the essential portion of the file
540 for fetching symbols. */
541 GElf_Addr soname_stroff = 0;
542 GElf_Addr dynstr_vaddr = 0;
543 GElf_Xword dynstrsz = 0;
544 bool execlike = false;
545 inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val)
569 return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0;
572 const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32
573 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
574 void *dyn_data = NULL;
575 size_t dyn_data_size = 0;
576 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
577 && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz))
581 Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)];
582 Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)];
585 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
586 xlatefrom.d_buf = (void *) dyn_data;
587 xlatefrom.d_size = dyn_filesz;
588 xlateto.d_buf = &dyn;
589 xlateto.d_size = sizeof dyn;
591 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
593 if (elf32_xlatetom (&xlateto, &xlatefrom,
594 ehdr.e32.e_ident[EI_DATA]) != NULL)
595 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i)
596 if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val))
601 if (elf64_xlatetom (&xlateto, &xlatefrom,
602 ehdr.e32.e_ident[EI_DATA]) != NULL)
603 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i)
604 if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val))
608 finish_portion (&dyn_data, &dyn_data_size);
610 /* We'll use the name passed in or a stupid default if not DT_SONAME. */
612 name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : execlike ? "[pie]" : "[dso]";
615 size_t soname_size = 0;
616 if (! name_is_final && dynstrsz != 0 && dynstr_vaddr != 0)
618 /* We know the bounds of the .dynstr section.
620 The DYNSTR_VADDR pointer comes from the .dynamic section
621 (DT_STRTAB, detected above). Ordinarily the dynamic linker
622 will have adjusted this pointer in place so it's now an
623 absolute address. But sometimes .dynamic is read-only (in
624 vDSOs and odd architectures), and sometimes the adjustment
625 just hasn't happened yet in the memory image we looked at.
626 So treat DYNSTR_VADDR as an absolute address if it falls
627 within the module bounds, or try applying the phdr bias
628 when that adjusts it to fall within the module bounds. */
630 if ((dynstr_vaddr < module_start || dynstr_vaddr >= module_end)
631 && dynstr_vaddr + bias >= module_start
632 && dynstr_vaddr + bias < module_end)
633 dynstr_vaddr += bias;
635 if (unlikely (dynstr_vaddr + dynstrsz > module_end))
638 /* Try to get the DT_SONAME string. */
639 if (soname_stroff != 0 && soname_stroff + 1 < dynstrsz
640 && ! read_portion (&soname, &soname_size,
641 dynstr_vaddr + soname_stroff, 0))
645 /* Now that we have chosen the module's name and bounds, report it.
646 If we found a build ID, report that too. */
648 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
649 module_start, module_end);
650 if (likely (mod != NULL) && build_id != NULL
651 && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
660 /* At this point we do not need BUILD_ID or NAME any more.
661 They have been copied. */
663 finish_portion (&soname, &soname_size);
665 if (unlikely (mod == NULL))
671 /* We have reported the module. Now let the caller decide whether we
672 should read the whole thing in right now. */
674 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
675 : buffer_available >= contiguous ? 0
676 : contiguous - buffer_available);
677 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
678 : dynstr_vaddr + dynstrsz - start);
679 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
682 if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
683 cost, worthwhile, whole, contiguous,
684 read_eagerly_arg, &elf)
687 /* The caller wants to read the whole file in right now, but hasn't
688 done it for us. Fill in a local image of the virtual file. */
690 void *contents = calloc (1, file_trimmed_end);
691 if (unlikely (contents == NULL))
694 inline void final_read (size_t offset, GElf_Addr vaddr, size_t size)
696 void *into = contents + offset;
697 size_t read_size = size;
698 (void) segment_read (addr_segndx (dwfl, segment, vaddr, false),
699 &into, &read_size, vaddr, size);
702 if (contiguous < file_trimmed_end)
704 /* We can't use the memory image verbatim as the file image.
705 So we'll be reading into a local image of the virtual file. */
707 inline void read_phdr (GElf_Word type, GElf_Addr vaddr,
708 GElf_Off offset, GElf_Xword filesz)
711 final_read (offset, vaddr + bias, filesz);
714 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
715 for (uint_fast16_t i = 0; i < phnum; ++i)
716 read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr,
717 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz);
719 for (uint_fast16_t i = 0; i < phnum; ++i)
720 read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr,
721 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz);
725 /* The whole file sits contiguous in memory,
726 but the caller didn't want to just do it. */
728 const size_t have = MIN (buffer_available, file_trimmed_end);
729 memcpy (contents, buffer, have);
731 if (have < file_trimmed_end)
732 final_read (have, start + have, file_trimmed_end - have);
735 elf = elf_memory (contents, file_trimmed_end);
736 if (unlikely (elf == NULL))
739 elf->flags |= ELF_F_MALLOCED;
744 /* Install the file in the module. */
746 mod->main.vaddr = module_start - bias;
747 mod->main.address_sync = module_address_sync;
748 mod->main_bias = bias;