1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2 Copyright (C) 2005-2011, 2013, 2014 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
29 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30 Everything we need here is fine if its declarations just come first. */
40 #include <stdio_ext.h>
43 #include <sys/utsname.h>
48 #define KERNEL_MODNAME "kernel"
50 #define MODULEDIRFMT "/lib/modules/%s"
52 #define KNOTESFILE "/sys/kernel/notes"
53 #define MODNOTESFMT "/sys/module/%s/notes"
54 #define KSYMSFILE "/proc/kallsyms"
55 #define MODULELIST "/proc/modules"
56 #define SECADDRDIRFMT "/sys/module/%s/sections/"
57 #define MODULE_SECT_NAME_LEN 32 /* Minimum any linux/module.h has had. */
60 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
61 static const char *vmlinux_suffixes[] =
75 /* Try to open the given file as it is or under the debuginfo directory. */
77 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
82 /* Don't bother trying *FNAME itself here if the path will cause it to be
83 tried because we give its own basename as DEBUGLINK_FILE. */
84 int fd = ((((dwfl->callbacks->debuginfo_path
85 ? *dwfl->callbacks->debuginfo_path : NULL)
86 ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
87 : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
91 Dwfl_Module fakemod = { .dwfl = dwfl };
92 /* First try the file's unadorned basename as DEBUGLINK_FILE,
93 to look for "vmlinux" files. */
94 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
95 *fname, basename (*fname), 0,
97 if (fd < 0 && try_debug)
98 /* Next, let the call use the default of basename + ".debug",
99 to look for "vmlinux.debug" files. */
100 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
102 &fakemod.debug.name);
103 if (fakemod.debug.name != NULL)
106 *fname = fakemod.debug.name;
110 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
113 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
117 if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
119 fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
140 static inline const char *
141 kernel_release (void)
143 /* Cache the `uname -r` string we'll use. */
144 static struct utsname utsname;
145 if (utsname.release[0] == '\0' && uname (&utsname) != 0)
147 return utsname.release;
151 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
153 if ((release[0] == '/'
154 ? asprintf (fname, "%s/vmlinux", release)
155 : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
158 int fd = try_kernel_name (dwfl, fname, true);
159 if (fd < 0 && release[0] != '/')
162 if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
164 fd = try_kernel_name (dwfl, fname, true);
171 get_release (Dwfl *dwfl, const char **release)
176 const char *release_string = release == NULL ? NULL : *release;
177 if (release_string == NULL)
179 release_string = kernel_release ();
180 if (release_string == NULL)
183 *release = release_string;
190 report_kernel (Dwfl *dwfl, const char **release,
191 int (*predicate) (const char *module, const char *file))
193 int result = get_release (dwfl, release);
194 if (unlikely (result != 0))
198 int fd = find_kernel_elf (dwfl, *release, &fname);
201 result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
202 ? 0 : errno ?: ENOENT);
207 if (predicate != NULL)
209 /* Let the predicate decide whether to use this one. */
210 int want = (*predicate) (KERNEL_MODNAME, fname);
218 /* Note that on some architectures (e.g. x86_64) the vmlinux
219 is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
220 In both cases the phdr p_vaddr load address will be non-zero.
221 We want the image to be placed as if it was ET_DYN, so
222 pass true for add_p_vaddr which will do the right thing
223 (in combination with a zero base) in either case. */
224 Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
229 /* The kernel is ET_EXEC, but always treat it as relocatable. */
230 mod->e_type = ET_DYN;
235 if (!report || result < 0)
242 /* Look for a kernel debug archive. If we find one, report all its modules.
243 If not, return ENOENT. */
245 report_kernel_archive (Dwfl *dwfl, const char **release,
246 int (*predicate) (const char *module, const char *file))
248 int result = get_release (dwfl, release);
249 if (unlikely (result != 0))
253 int res = (((*release)[0] == '/')
254 ? asprintf (&archive, "%s/debug.a", *release)
255 : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
256 if (unlikely (res < 0))
259 int fd = try_kernel_name (dwfl, &archive, false);
261 result = errno ?: ENOENT;
264 /* We have the archive file open! */
265 Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
267 if (unlikely (last == NULL))
271 /* Find the kernel and move it to the head of the list. */
272 Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
273 for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
274 if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
289 check_suffix (const FTSENT *f, size_t namelen)
292 if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1 \
293 : f->fts_namelen >= sizeof sfx) \
294 && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1), \
296 return sizeof sfx - 1
314 /* Report a kernel and all its modules found on disk, for offline use.
315 If RELEASE starts with '/', it names a directory to look in;
316 if not, it names a directory to find under /lib/modules/;
317 if null, /lib/modules/`uname -r` is used.
318 Returns zero on success, -1 if dwfl_report_module failed,
319 or an errno code if finding the files on disk failed. */
321 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
322 int (*predicate) (const char *module,
325 int result = report_kernel_archive (dwfl, &release, predicate);
326 if (result != ENOENT)
329 /* First report the kernel. */
330 result = report_kernel (dwfl, &release, predicate);
333 /* Do "find /lib/modules/RELEASE -name *.ko". */
335 char *modulesdir[] = { NULL, NULL };
336 if (release[0] == '/')
337 modulesdir[0] = (char *) release;
340 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
344 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
345 if (modulesdir[0] == (char *) release)
346 modulesdir[0] = NULL;
349 free (modulesdir[0]);
354 while ((f = fts_read (fts)) != NULL)
356 /* Skip a "source" subtree, which tends to be large.
357 This insane hard-coding of names is what depmod does too. */
358 if (f->fts_namelen == sizeof "source" - 1
359 && !strcmp (f->fts_name, "source"))
361 fts_set (fts, f, FTS_SKIP);
370 /* See if this file name matches "*.ko". */
371 const size_t suffix = check_suffix (f, 0);
374 /* We have a .ko file to report. Following the algorithm
375 by which the kernel makefiles set KBUILD_MODNAME, we
376 replace all ',' or '-' with '_' in the file name and
377 call that the module name. Modules could well be
378 built using different embedded names than their file
379 names. To handle that, we would have to look at the
380 __this_module.name contents in the module's text. */
382 char name[f->fts_namelen - suffix + 1];
383 for (size_t i = 0; i < f->fts_namelen - 3U; ++i)
384 if (f->fts_name[i] == '-' || f->fts_name[i] == ',')
387 name[i] = f->fts_name[i];
388 name[f->fts_namelen - suffix] = '\0';
390 if (predicate != NULL)
392 /* Let the predicate decide whether to use this one. */
393 int want = (*predicate) (name, f->fts_path);
403 if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
414 result = f->fts_errno;
422 /* We only get here in error cases. */
426 free (modulesdir[0]);
431 INTDEF (dwfl_linux_kernel_report_offline)
434 /* Grovel around to guess the bounds of the runtime kernel image. */
436 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
438 FILE *f = fopen (KSYMSFILE, "r");
442 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
452 inline bool read_address (Dwarf_Addr *addr)
454 if ((n = getline (&line, &linesz, f)) < 1 || line[n - 2] == ']')
456 *addr = strtoull (line, &p, 16);
457 p += strspn (p, " \t");
458 type = strsep (&p, " \t\n");
461 return p != NULL && p != line;
466 result = read_address (start) ? 0 : -1;
467 while (result == 0 && strchr ("TtRr", *type) == NULL);
472 while (read_address (end))
473 if (*notes == 0 && !strcmp (p, "__start_notes\n"))
476 Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
477 *start &= -(Dwarf_Addr) round_kernel;
478 *end += round_kernel - 1;
479 *end &= -(Dwarf_Addr) round_kernel;
480 if (*start >= *end || *end - *start < round_kernel)
486 result = ferror_unlocked (f) ? errno : ENOEXEC;
494 /* Look for a build ID note in NOTESFILE and associate the ID with MOD. */
496 check_notes (Dwfl_Module *mod, const char *notesfile,
497 Dwarf_Addr vaddr, const char *secname)
499 int fd = open64 (notesfile, O_RDONLY);
503 assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
504 assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
508 unsigned char data[8192];
511 ssize_t n = read (fd, buf.data, sizeof buf);
517 unsigned char *p = buf.data;
518 while (p < &buf.data[n])
520 /* No translation required since we are reading the native kernel. */
521 GElf_Nhdr *nhdr = (void *) p;
523 unsigned char *name = p;
524 p += (nhdr->n_namesz + 3) & -4U;
525 unsigned char *bits = p;
526 p += (nhdr->n_descsz + 3) & -4U;
528 if (p <= &buf.data[n]
529 && nhdr->n_type == NT_GNU_BUILD_ID
530 && nhdr->n_namesz == sizeof "GNU"
531 && !memcmp (name, "GNU", sizeof "GNU"))
533 /* Found it. For a module we must figure out its VADDR now. */
536 && (INTUSE(dwfl_linux_kernel_module_section_address)
537 (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
538 || vaddr == (GElf_Addr) -1l))
542 vaddr += bits - buf.data;
543 return INTUSE(dwfl_module_report_build_id) (mod, bits,
544 nhdr->n_descsz, vaddr);
551 /* Look for a build ID for the kernel. */
553 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
555 return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
558 /* Look for a build ID for a loaded kernel module. */
560 check_module_notes (Dwfl_Module *mod)
562 char *dirs[2] = { NULL, NULL };
563 if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
566 FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
575 while ((f = fts_read (fts)) != NULL)
582 result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
583 if (result > 0) /* Nothing found. */
592 result = f->fts_errno;
601 /* We only get here when finished or in error cases. */
611 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
615 inline Dwfl_Module *report (void)
617 return INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end);
620 /* This is a bit of a kludge. If we already reported the kernel,
621 don't bother figuring it out again--it never changes. */
622 for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
623 if (!strcmp (m->name, KERNEL_MODNAME))
627 return report () == NULL ? -1 : 0;
630 /* Try to figure out the bounds of the kernel image without
631 looking for any vmlinux file. */
633 /* The compiler cannot deduce that if intuit_kernel_bounds returns
634 zero NOTES will be initialized. Fake the initialization. */
635 asm ("" : "=m" (notes));
636 int result = intuit_kernel_bounds (&start, &end, ¬es);
639 Dwfl_Module *mod = report ();
640 return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
642 if (result != ENOENT)
645 /* Find the ELF file for the running kernel and dwfl_report_elf it. */
646 return report_kernel (dwfl, NULL, NULL);
648 INTDEF (dwfl_linux_kernel_report_kernel)
651 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules. */
654 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
655 void **userdata __attribute__ ((unused)),
656 const char *module_name,
657 Dwarf_Addr base __attribute__ ((unused)),
658 char **file_name, Elf **elfp)
660 if (mod->build_id_len > 0)
662 int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
664 if (fd >= 0 || mod->main.elf != NULL || errno != 0)
668 const char *release = kernel_release ();
672 if (!strcmp (module_name, KERNEL_MODNAME))
673 return find_kernel_elf (mod->dwfl, release, file_name);
675 /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko". */
677 char *modulesdir[] = { NULL, NULL };
678 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
681 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
684 free (modulesdir[0]);
688 size_t namelen = strlen (module_name);
690 /* This is a kludge. There is no actual necessary relationship between
691 the name of the .ko file installed and the module name the kernel
692 knows it by when it's loaded. The kernel's only idea of the module
693 name comes from the name embedded in the object's magic
694 .gnu.linkonce.this_module section.
696 In practice, these module names match the .ko file names except for
697 some using '_' and some using '-'. So our cheap kludge is to look for
698 two files when either a '_' or '-' appears in a module name, one using
699 only '_' and one only using '-'. */
701 char alternate_name[namelen + 1];
702 inline bool subst_name (char from, char to)
704 const char *n = memchr (module_name, from, namelen);
707 char *a = mempcpy (alternate_name, module_name, n - module_name);
711 while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
713 a = mempcpy (a, n, p - n);
717 memcpy (a, n, namelen - (n - module_name) + 1);
720 if (!subst_name ('-', '_') && !subst_name ('_', '-'))
721 alternate_name[0] = '\0';
725 while ((f = fts_read (fts)) != NULL)
727 /* Skip a "source" subtree, which tends to be large.
728 This insane hard-coding of names is what depmod does too. */
729 if (f->fts_namelen == sizeof "source" - 1
730 && !strcmp (f->fts_name, "source"))
732 fts_set (fts, f, FTS_SKIP);
742 /* See if this file name is "MODULE_NAME.ko". */
743 if (check_suffix (f, namelen)
744 && (!memcmp (f->fts_name, module_name, namelen)
745 || !memcmp (f->fts_name, alternate_name, namelen)))
747 int fd = open64 (f->fts_accpath, O_RDONLY);
748 *file_name = strdup (f->fts_path);
750 free (modulesdir[0]);
753 else if (*file_name == NULL)
765 error = f->fts_errno;
775 free (modulesdir[0]);
779 INTDEF (dwfl_linux_kernel_find_elf)
782 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
783 We read the information from /sys/module directly. */
786 dwfl_linux_kernel_module_section_address
787 (Dwfl_Module *mod __attribute__ ((unused)),
788 void **userdata __attribute__ ((unused)),
789 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
790 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
791 const GElf_Shdr *shdr __attribute__ ((unused)),
795 if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
796 return DWARF_CB_ABORT;
798 FILE *f = fopen (sysfile, "r");
805 /* The .modinfo and .data.percpu sections are never kept
806 loaded in the kernel. If the kernel was compiled without
807 CONFIG_MODULE_UNLOAD, the .exit.* sections are not
808 actually loaded at all.
810 Setting *ADDR to -1 tells the caller this section is
811 actually absent from memory. */
813 if (!strcmp (secname, ".modinfo")
814 || !strcmp (secname, ".data.percpu")
815 || !strncmp (secname, ".exit", 5))
817 *addr = (Dwarf_Addr) -1l;
821 /* The goofy PPC64 module_frob_arch_sections function tweaks
822 the section names as a way to control other kernel code's
823 behavior, and this cruft leaks out into the /sys information.
824 The file name for ".init*" may actually look like "_init*". */
826 const bool is_init = !strncmp (secname, ".init", 5);
829 if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
830 modname, &secname[1]) < 0)
832 f = fopen (sysfile, "r");
838 /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
839 In case that size increases in the future, look for longer
840 truncated names first. */
841 size_t namelen = strlen (secname);
842 if (namelen >= MODULE_SECT_NAME_LEN)
844 int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
847 return DWARF_CB_ABORT;
848 char *end = sysfile + len;
852 f = fopen (sysfile, "r");
853 if (is_init && f == NULL && errno == ENOENT)
855 sysfile[len - namelen] = '_';
856 f = fopen (sysfile, "r");
857 sysfile[len - namelen] = '.';
860 while (f == NULL && errno == ENOENT
861 && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
869 return DWARF_CB_ABORT;
873 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
875 int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
876 : ferror_unlocked (f) ? errno : ENOEXEC);
883 return DWARF_CB_ABORT;
885 INTDEF (dwfl_linux_kernel_module_section_address)
888 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
890 FILE *f = fopen (MODULELIST, "r");
894 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
898 unsigned long int modsz;
902 /* We can't just use fscanf here because it's not easy to distinguish \n
903 from other whitespace so as to take the optional word following the
904 address but always stop at the end of the line. */
905 while (getline (&line, &linesz, f) > 0
906 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
907 modname, &modsz, &modaddr) == 3)
909 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
910 modaddr, modaddr + modsz);
917 result = check_module_notes (mod);
922 result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
928 INTDEF (dwfl_linux_kernel_report_modules)