1 /* Sniff out modules from ELF headers visible in memory segments.
2 Copyright (C) 2008-2012 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
30 #include "../libelf/libelfP.h" /* For NOTE_ALIGN. */
37 #include <sys/param.h>
42 /* A good size for the initial read from memory, if it's not too costly.
43 This more than covers the phdrs and note segment in the average 64-bit
46 #define INITIAL_READ 1024
48 #if __BYTE_ORDER == __LITTLE_ENDIAN
49 # define MY_ELFDATA ELFDATA2LSB
51 # define MY_ELFDATA ELFDATA2MSB
55 /* Return user segment index closest to ADDR but not above it.
56 If NEXT, return the closest to ADDR but not below it. */
58 addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr, bool next)
63 if (dwfl->lookup_segndx[segment] >= 0)
64 ndx = dwfl->lookup_segndx[segment];
65 if (++segment >= dwfl->lookup_elts - 1)
66 return next ? ndx + 1 : ndx;
68 while (dwfl->lookup_addr[segment] < addr);
72 while (dwfl->lookup_segndx[segment] < 0)
73 if (++segment >= dwfl->lookup_elts - 1)
75 ndx = dwfl->lookup_segndx[segment];
82 dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
83 Dwfl_Memory_Callback *memory_callback,
84 void *memory_callback_arg,
85 Dwfl_Module_Callback *read_eagerly,
86 void *read_eagerly_arg)
90 if (segment >= dwfl->lookup_elts)
91 segment = dwfl->lookup_elts - 1;
94 && (dwfl->lookup_segndx[segment] > ndx
95 || dwfl->lookup_segndx[segment] == -1))
98 while (dwfl->lookup_segndx[segment] < ndx)
99 if (++segment == dwfl->lookup_elts)
102 GElf_Addr start = dwfl->lookup_addr[segment];
104 inline bool segment_read (int segndx,
105 void **buffer, size_t *buffer_available,
106 GElf_Addr addr, size_t minread)
108 return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available,
109 addr, minread, memory_callback_arg);
112 inline void release_buffer (void **buffer, size_t *buffer_available)
115 (void) segment_read (-1, buffer, buffer_available, 0, 0);
118 /* First read in the file header and check its sanity. */
121 size_t buffer_available = INITIAL_READ;
123 inline int finish (void)
125 release_buffer (&buffer, &buffer_available);
129 if (segment_read (ndx, &buffer, &buffer_available,
130 start, sizeof (Elf64_Ehdr))
131 || memcmp (buffer, ELFMAG, SELFMAG) != 0)
134 inline bool read_portion (void **data, size_t *data_size,
135 GElf_Addr vaddr, size_t filesz)
137 if (vaddr - start + filesz > buffer_available
138 /* If we're in string mode, then don't consider the buffer we have
139 sufficient unless it contains the terminator of the string. */
140 || (filesz == 0 && memchr (vaddr - start + buffer, '\0',
141 buffer_available - (vaddr - start)) == NULL))
145 return segment_read (addr_segndx (dwfl, segment, vaddr, false),
146 data, data_size, vaddr, filesz);
149 /* We already have this whole note segment from our initial read. */
150 *data = vaddr - start + buffer;
155 inline void finish_portion (void **data, size_t *data_size)
158 release_buffer (data, data_size);
161 /* Extract the information we need from the file header. */
169 uint_fast16_t phentsize;
173 .d_type = ELF_T_EHDR,
174 .d_buf = (void *) buffer,
175 .d_version = EV_CURRENT,
179 .d_type = ELF_T_EHDR,
181 .d_size = sizeof ehdr,
182 .d_version = EV_CURRENT,
184 switch (((const unsigned char *) buffer)[EI_CLASS])
187 xlatefrom.d_size = sizeof (Elf32_Ehdr);
188 if (elf32_xlatetom (&xlateto, &xlatefrom,
189 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
191 phoff = ehdr.e32.e_phoff;
192 phnum = ehdr.e32.e_phnum;
193 phentsize = ehdr.e32.e_phentsize;
194 if (phentsize != sizeof (Elf32_Phdr))
196 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize;
200 xlatefrom.d_size = sizeof (Elf64_Ehdr);
201 if (elf64_xlatetom (&xlateto, &xlatefrom,
202 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
204 phoff = ehdr.e64.e_phoff;
205 phnum = ehdr.e64.e_phnum;
206 phentsize = ehdr.e64.e_phentsize;
207 if (phentsize != sizeof (Elf64_Phdr))
209 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize;
216 /* The file header tells where to find the program headers.
217 These are what we need to find the boundaries of the module.
218 Without them, we don't have a module to report. */
223 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
224 xlatefrom.d_size = phnum * phentsize;
226 void *ph_buffer = NULL;
227 size_t ph_buffer_size = 0;
228 if (read_portion (&ph_buffer, &ph_buffer_size,
229 start + phoff, xlatefrom.d_size))
232 xlatefrom.d_buf = ph_buffer;
236 Elf32_Phdr p32[phnum];
237 Elf64_Phdr p64[phnum];
240 xlateto.d_buf = &phdrs;
241 xlateto.d_size = sizeof phdrs;
243 /* Track the bounds of the file visible in memory. */
244 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */
245 GElf_Off file_end = 0; /* Rounded up to effective page size. */
246 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */
247 GElf_Off total_filesz = 0; /* Total size of data to read. */
249 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */
251 bool found_bias = false;
253 /* Collect the unbiased bounds of the module here. */
254 GElf_Addr module_start = -1l;
255 GElf_Addr module_end = 0;
256 GElf_Addr module_address_sync = 0;
258 /* If we see PT_DYNAMIC, record it here. */
259 GElf_Addr dyn_vaddr = 0;
260 GElf_Xword dyn_filesz = 0;
262 /* Collect the build ID bits here. */
263 void *build_id = NULL;
264 size_t build_id_len = 0;
265 GElf_Addr build_id_vaddr = 0;
267 /* Consider a PT_NOTE we've found in the image. */
268 inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz)
270 /* If we have already seen a build ID, we don't care any more. */
271 if (build_id != NULL || filesz == 0)
276 if (read_portion (&data, &data_size, vaddr, filesz))
279 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
282 if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA)
286 notes = malloc (filesz);
287 if (unlikely (notes == NULL))
289 xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR;
290 xlatefrom.d_buf = (void *) data;
291 xlatefrom.d_size = filesz;
292 xlateto.d_buf = notes;
293 xlateto.d_size = filesz;
294 if (elf32_xlatetom (&xlateto, &xlatefrom,
295 ehdr.e32.e_ident[EI_DATA]) == NULL)
299 const GElf_Nhdr *nh = notes;
300 while ((const void *) nh < (const void *) notes + filesz)
302 const void *note_name = nh + 1;
303 const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz);
304 if (unlikely ((size_t) ((const void *) notes + filesz
305 - note_desc) < nh->n_descsz))
308 if (nh->n_type == NT_GNU_BUILD_ID
310 && nh->n_namesz == sizeof "GNU"
311 && !memcmp (note_name, "GNU", sizeof "GNU"))
313 build_id_vaddr = note_desc - (const void *) notes + vaddr;
314 build_id_len = nh->n_descsz;
315 build_id = malloc (nh->n_descsz);
316 if (likely (build_id != NULL))
317 memcpy (build_id, note_desc, build_id_len);
321 nh = note_desc + NOTE_ALIGN (nh->n_descsz);
327 finish_portion (&data, &data_size);
330 /* Consider each of the program headers we've read from the image. */
331 inline void consider_phdr (GElf_Word type,
332 GElf_Addr vaddr, GElf_Xword memsz,
333 GElf_Off offset, GElf_Xword filesz,
344 /* We calculate from the p_offset of the note segment,
345 because we don't yet know the bias for its p_vaddr. */
346 consider_notes (start + offset, filesz);
350 align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1;
352 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
353 GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end;
354 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
356 if (file_trimmed_end < offset + filesz)
358 file_trimmed_end = offset + filesz;
360 /* Trim the last segment so we don't bother with zeros
361 in the last page that are off the end of the file.
362 However, if the extra bit in that page includes the
363 section headers, keep them. */
364 if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end)
366 filesz += shdrs_end - file_trimmed_end;
367 file_trimmed_end = shdrs_end;
371 total_filesz += filesz;
373 if (file_end < filesz_offset)
375 file_end = filesz_offset;
376 if (filesz_vaddr - start == filesz_offset)
377 contiguous = file_end;
380 if (!found_bias && (offset & -align) == 0
381 && likely (filesz_offset >= phoff + phnum * phentsize))
383 bias = start - vaddr;
387 if ((vaddr & -align) < module_start)
389 module_start = vaddr & -align;
390 module_address_sync = vaddr + memsz;
393 if (module_end < vaddr_end)
394 module_end = vaddr_end;
398 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
400 if (elf32_xlatetom (&xlateto, &xlatefrom,
401 ehdr.e32.e_ident[EI_DATA]) == NULL)
402 found_bias = false; /* Trigger error check. */
404 for (uint_fast16_t i = 0; i < phnum; ++i)
405 consider_phdr (phdrs.p32[i].p_type,
406 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz,
407 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz,
408 phdrs.p32[i].p_align);
412 if (elf64_xlatetom (&xlateto, &xlatefrom,
413 ehdr.e32.e_ident[EI_DATA]) == NULL)
414 found_bias = false; /* Trigger error check. */
416 for (uint_fast16_t i = 0; i < phnum; ++i)
417 consider_phdr (phdrs.p64[i].p_type,
418 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz,
419 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz,
420 phdrs.p64[i].p_align);
423 finish_portion (&ph_buffer, &ph_buffer_size);
425 /* We must have seen the segment covering offset 0, or else the ELF
426 header we read at START was not produced by these program headers. */
427 if (unlikely (!found_bias))
430 /* Now we know enough to report a module for sure: its bounds. */
431 module_start += bias;
436 /* Our return value now says to skip the segments contained
437 within the module. */
438 ndx = addr_segndx (dwfl, segment, module_end, true);
440 /* Examine its .dynamic section to get more interesting details.
441 If it has DT_SONAME, we'll use that as the module name.
442 If it has a DT_DEBUG, then it's actually a PIE rather than a DSO.
443 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
444 and they also tell us the essential portion of the file
445 for fetching symbols. */
446 GElf_Addr soname_stroff = 0;
447 GElf_Addr dynstr_vaddr = 0;
448 GElf_Xword dynstrsz = 0;
449 bool execlike = false;
450 inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val)
474 return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0;
477 const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32
478 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
479 void *dyn_data = NULL;
480 size_t dyn_data_size = 0;
481 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
482 && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz))
486 Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)];
487 Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)];
490 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
491 xlatefrom.d_buf = (void *) dyn_data;
492 xlatefrom.d_size = dyn_filesz;
493 xlateto.d_buf = &dyn;
494 xlateto.d_size = sizeof dyn;
496 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
498 if (elf32_xlatetom (&xlateto, &xlatefrom,
499 ehdr.e32.e_ident[EI_DATA]) != NULL)
500 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i)
501 if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val))
506 if (elf64_xlatetom (&xlateto, &xlatefrom,
507 ehdr.e32.e_ident[EI_DATA]) != NULL)
508 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i)
509 if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val))
513 finish_portion (&dyn_data, &dyn_data_size);
515 /* We'll use the name passed in or a stupid default if not DT_SONAME. */
517 name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : execlike ? "[pie]" : "[dso]";
520 size_t soname_size = 0;
521 if (dynstrsz != 0 && dynstr_vaddr != 0)
523 /* We know the bounds of the .dynstr section.
525 The DYNSTR_VADDR pointer comes from the .dynamic section
526 (DT_STRTAB, detected above). Ordinarily the dynamic linker
527 will have adjusted this pointer in place so it's now an
528 absolute address. But sometimes .dynamic is read-only (in
529 vDSOs and odd architectures), and sometimes the adjustment
530 just hasn't happened yet in the memory image we looked at.
531 So treat DYNSTR_VADDR as an absolute address if it falls
532 within the module bounds, or try applying the phdr bias
533 when that adjusts it to fall within the module bounds. */
535 if ((dynstr_vaddr < module_start || dynstr_vaddr >= module_end)
536 && dynstr_vaddr + bias >= module_start
537 && dynstr_vaddr + bias < module_end)
538 dynstr_vaddr += bias;
540 if (unlikely (dynstr_vaddr + dynstrsz > module_end))
543 /* Try to get the DT_SONAME string. */
544 if (soname_stroff != 0 && soname_stroff + 1 < dynstrsz
545 && ! read_portion (&soname, &soname_size,
546 dynstr_vaddr + soname_stroff, 0))
550 /* Now that we have chosen the module's name and bounds, report it.
551 If we found a build ID, report that too. */
553 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
554 module_start, module_end);
555 if (likely (mod != NULL) && build_id != NULL
556 && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
565 /* At this point we do not need BUILD_ID or NAME any more.
566 They have been copied. */
568 finish_portion (&soname, &soname_size);
570 if (unlikely (mod == NULL))
576 /* We have reported the module. Now let the caller decide whether we
577 should read the whole thing in right now. */
579 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
580 : buffer_available >= contiguous ? 0
581 : contiguous - buffer_available);
582 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
583 : dynstr_vaddr + dynstrsz - start);
584 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
587 if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
588 cost, worthwhile, whole, contiguous,
589 read_eagerly_arg, &elf)
592 /* The caller wants to read the whole file in right now, but hasn't
593 done it for us. Fill in a local image of the virtual file. */
595 void *contents = calloc (1, file_trimmed_end);
596 if (unlikely (contents == NULL))
599 inline void final_read (size_t offset, GElf_Addr vaddr, size_t size)
601 void *into = contents + offset;
602 size_t read_size = size;
603 (void) segment_read (addr_segndx (dwfl, segment, vaddr, false),
604 &into, &read_size, vaddr, size);
607 if (contiguous < file_trimmed_end)
609 /* We can't use the memory image verbatim as the file image.
610 So we'll be reading into a local image of the virtual file. */
612 inline void read_phdr (GElf_Word type, GElf_Addr vaddr,
613 GElf_Off offset, GElf_Xword filesz)
616 final_read (offset, vaddr + bias, filesz);
619 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
620 for (uint_fast16_t i = 0; i < phnum; ++i)
621 read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr,
622 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz);
624 for (uint_fast16_t i = 0; i < phnum; ++i)
625 read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr,
626 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz);
630 /* The whole file sits contiguous in memory,
631 but the caller didn't want to just do it. */
633 const size_t have = MIN (buffer_available, file_trimmed_end);
634 memcpy (contents, buffer, have);
636 if (have < file_trimmed_end)
637 final_read (have, start + have, file_trimmed_end - have);
640 elf = elf_memory (contents, file_trimmed_end);
641 if (unlikely (elf == NULL))
644 elf->flags |= ELF_F_MALLOCED;
649 /* Install the file in the module. */
651 mod->main.vaddr = module_start - bias;
652 mod->main.address_sync = module_address_sync;