1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2 Copyright (C) 2005-2011 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
29 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30 Everything we need here is fine if its declarations just come first. */
40 #include <stdio_ext.h>
43 #include <sys/utsname.h>
48 #define KERNEL_MODNAME "kernel"
50 #define MODULEDIRFMT "/lib/modules/%s"
52 #define KNOTESFILE "/sys/kernel/notes"
53 #define MODNOTESFMT "/sys/module/%s/notes"
54 #define KSYMSFILE "/proc/kallsyms"
55 #define MODULELIST "/proc/modules"
56 #define SECADDRDIRFMT "/sys/module/%s/sections/"
57 #define MODULE_SECT_NAME_LEN 32 /* Minimum any linux/module.h has had. */
60 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
61 static const char *vmlinux_suffixes[] =
75 /* Try to open the given file as it is or under the debuginfo directory. */
77 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
82 /* Don't bother trying *FNAME itself here if the path will cause it to be
83 tried because we give its own basename as DEBUGLINK_FILE. */
84 int fd = ((((dwfl->callbacks->debuginfo_path
85 ? *dwfl->callbacks->debuginfo_path : NULL)
86 ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
87 : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
91 char *debugfname = NULL;
92 Dwfl_Module fakemod = { .dwfl = dwfl };
93 /* First try the file's unadorned basename as DEBUGLINK_FILE,
94 to look for "vmlinux" files. */
95 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
96 *fname, basename (*fname), 0,
98 if (fd < 0 && try_debug)
99 /* Next, let the call use the default of basename + ".debug",
100 to look for "vmlinux.debug" files. */
101 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
104 if (debugfname != NULL)
111 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
114 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
118 if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
120 fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
141 static inline const char *
142 kernel_release (void)
144 /* Cache the `uname -r` string we'll use. */
145 static struct utsname utsname;
146 if (utsname.release[0] == '\0' && uname (&utsname) != 0)
148 return utsname.release;
152 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
154 if ((release[0] == '/'
155 ? asprintf (fname, "%s/vmlinux", release)
156 : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
159 int fd = try_kernel_name (dwfl, fname, true);
160 if (fd < 0 && release[0] != '/')
163 if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
165 fd = try_kernel_name (dwfl, fname, true);
172 get_release (Dwfl *dwfl, const char **release)
177 const char *release_string = release == NULL ? NULL : *release;
178 if (release_string == NULL)
180 release_string = kernel_release ();
181 if (release_string == NULL)
184 *release = release_string;
191 report_kernel (Dwfl *dwfl, const char **release,
192 int (*predicate) (const char *module, const char *file))
194 int result = get_release (dwfl, release);
195 if (unlikely (result != 0))
199 int fd = find_kernel_elf (dwfl, *release, &fname);
202 result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
203 ? 0 : errno ?: ENOENT);
208 if (predicate != NULL)
210 /* Let the predicate decide whether to use this one. */
211 int want = (*predicate) (KERNEL_MODNAME, fname);
219 Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
224 /* The kernel is ET_EXEC, but always treat it as relocatable. */
225 mod->e_type = ET_DYN;
228 if (!report || result < 0)
237 /* Look for a kernel debug archive. If we find one, report all its modules.
238 If not, return ENOENT. */
240 report_kernel_archive (Dwfl *dwfl, const char **release,
241 int (*predicate) (const char *module, const char *file))
243 int result = get_release (dwfl, release);
244 if (unlikely (result != 0))
248 if (unlikely ((*release)[0] == '/'
249 ? asprintf (&archive, "%s/debug.a", *release)
250 : asprintf (&archive, MODULEDIRFMT "/debug.a", *release)) < 0)
253 int fd = try_kernel_name (dwfl, &archive, false);
255 result = errno ?: ENOENT;
258 /* We have the archive file open! */
259 Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
261 if (unlikely (last == NULL))
265 /* Find the kernel and move it to the head of the list. */
266 Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
267 for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
268 if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
283 check_suffix (const FTSENT *f, size_t namelen)
286 if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1 \
287 : f->fts_namelen >= sizeof sfx) \
288 && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1), \
290 return sizeof sfx - 1
305 /* Report a kernel and all its modules found on disk, for offline use.
306 If RELEASE starts with '/', it names a directory to look in;
307 if not, it names a directory to find under /lib/modules/;
308 if null, /lib/modules/`uname -r` is used.
309 Returns zero on success, -1 if dwfl_report_module failed,
310 or an errno code if finding the files on disk failed. */
312 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
313 int (*predicate) (const char *module,
316 int result = report_kernel_archive (dwfl, &release, predicate);
317 if (result != ENOENT)
320 /* First report the kernel. */
321 result = report_kernel (dwfl, &release, predicate);
324 /* Do "find /lib/modules/RELEASE -name *.ko". */
326 char *modulesdir[] = { NULL, NULL };
327 if (release[0] == '/')
328 modulesdir[0] = (char *) release;
331 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
335 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
336 if (modulesdir[0] == (char *) release)
337 modulesdir[0] = NULL;
340 free (modulesdir[0]);
345 while ((f = fts_read (fts)) != NULL)
347 /* Skip a "source" subtree, which tends to be large.
348 This insane hard-coding of names is what depmod does too. */
349 if (f->fts_namelen == sizeof "source" - 1
350 && !strcmp (f->fts_name, "source"))
352 fts_set (fts, f, FTS_SKIP);
361 /* See if this file name matches "*.ko". */
362 const size_t suffix = check_suffix (f, 0);
365 /* We have a .ko file to report. Following the algorithm
366 by which the kernel makefiles set KBUILD_MODNAME, we
367 replace all ',' or '-' with '_' in the file name and
368 call that the module name. Modules could well be
369 built using different embedded names than their file
370 names. To handle that, we would have to look at the
371 __this_module.name contents in the module's text. */
373 char name[f->fts_namelen - suffix + 1];
374 for (size_t i = 0; i < f->fts_namelen - 3U; ++i)
375 if (f->fts_name[i] == '-' || f->fts_name[i] == ',')
378 name[i] = f->fts_name[i];
379 name[f->fts_namelen - suffix] = '\0';
381 if (predicate != NULL)
383 /* Let the predicate decide whether to use this one. */
384 int want = (*predicate) (name, f->fts_path);
394 if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
405 result = f->fts_errno;
413 /* We only get here in error cases. */
417 free (modulesdir[0]);
422 INTDEF (dwfl_linux_kernel_report_offline)
425 /* Grovel around to guess the bounds of the runtime kernel image. */
427 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
429 FILE *f = fopen (KSYMSFILE, "r");
433 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
443 inline bool read_address (Dwarf_Addr *addr)
445 if ((n = getline (&line, &linesz, f)) < 1 || line[n - 2] == ']')
447 *addr = strtoull (line, &p, 16);
448 p += strspn (p, " \t");
449 type = strsep (&p, " \t\n");
452 return p != NULL && p != line;
457 result = read_address (start) ? 0 : -1;
458 while (result == 0 && strchr ("TtRr", *type) == NULL);
463 while (read_address (end))
464 if (*notes == 0 && !strcmp (p, "__start_notes\n"))
467 Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
468 *start &= -(Dwarf_Addr) round_kernel;
469 *end += round_kernel - 1;
470 *end &= -(Dwarf_Addr) round_kernel;
471 if (*start >= *end || *end - *start < round_kernel)
477 result = ferror_unlocked (f) ? errno : ENOEXEC;
485 /* Look for a build ID note in NOTESFILE and associate the ID with MOD. */
487 check_notes (Dwfl_Module *mod, const char *notesfile,
488 Dwarf_Addr vaddr, const char *secname)
490 int fd = open64 (notesfile, O_RDONLY);
494 assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
495 assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
499 unsigned char data[8192];
502 ssize_t n = read (fd, buf.data, sizeof buf);
508 unsigned char *p = buf.data;
509 while (p < &buf.data[n])
511 /* No translation required since we are reading the native kernel. */
512 GElf_Nhdr *nhdr = (void *) p;
514 unsigned char *name = p;
515 p += (nhdr->n_namesz + 3) & -4U;
516 unsigned char *bits = p;
517 p += (nhdr->n_descsz + 3) & -4U;
519 if (p <= &buf.data[n]
520 && nhdr->n_type == NT_GNU_BUILD_ID
521 && nhdr->n_namesz == sizeof "GNU"
522 && !memcmp (name, "GNU", sizeof "GNU"))
524 /* Found it. For a module we must figure out its VADDR now. */
527 && (INTUSE(dwfl_linux_kernel_module_section_address)
528 (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
529 || vaddr == (GElf_Addr) -1l))
533 vaddr += bits - buf.data;
534 return INTUSE(dwfl_module_report_build_id) (mod, bits,
535 nhdr->n_descsz, vaddr);
542 /* Look for a build ID for the kernel. */
544 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
546 return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
549 /* Look for a build ID for a loaded kernel module. */
551 check_module_notes (Dwfl_Module *mod)
553 char *dirs[2] = { NULL, NULL };
554 if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
557 FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
566 while ((f = fts_read (fts)) != NULL)
573 result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
574 if (result > 0) /* Nothing found. */
583 result = f->fts_errno;
592 /* We only get here when finished or in error cases. */
602 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
606 inline Dwfl_Module *report (void)
608 return INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end);
611 /* This is a bit of a kludge. If we already reported the kernel,
612 don't bother figuring it out again--it never changes. */
613 for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
614 if (!strcmp (m->name, KERNEL_MODNAME))
618 return report () == NULL ? -1 : 0;
621 /* Try to figure out the bounds of the kernel image without
622 looking for any vmlinux file. */
624 /* The compiler cannot deduce that if intuit_kernel_bounds returns
625 zero NOTES will be initialized. Fake the initialization. */
626 asm ("" : "=m" (notes));
627 int result = intuit_kernel_bounds (&start, &end, ¬es);
630 Dwfl_Module *mod = report ();
631 return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
633 if (result != ENOENT)
636 /* Find the ELF file for the running kernel and dwfl_report_elf it. */
637 return report_kernel (dwfl, NULL, NULL);
639 INTDEF (dwfl_linux_kernel_report_kernel)
642 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules. */
645 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
646 void **userdata __attribute__ ((unused)),
647 const char *module_name,
648 Dwarf_Addr base __attribute__ ((unused)),
649 char **file_name, Elf **elfp)
651 if (mod->build_id_len > 0)
653 int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
655 if (fd >= 0 || mod->main.elf != NULL || errno != 0)
659 const char *release = kernel_release ();
663 if (!strcmp (module_name, KERNEL_MODNAME))
664 return find_kernel_elf (mod->dwfl, release, file_name);
666 /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko". */
668 char *modulesdir[] = { NULL, NULL };
669 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
672 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
675 free (modulesdir[0]);
679 size_t namelen = strlen (module_name);
681 /* This is a kludge. There is no actual necessary relationship between
682 the name of the .ko file installed and the module name the kernel
683 knows it by when it's loaded. The kernel's only idea of the module
684 name comes from the name embedded in the object's magic
685 .gnu.linkonce.this_module section.
687 In practice, these module names match the .ko file names except for
688 some using '_' and some using '-'. So our cheap kludge is to look for
689 two files when either a '_' or '-' appears in a module name, one using
690 only '_' and one only using '-'. */
692 char alternate_name[namelen + 1];
693 inline bool subst_name (char from, char to)
695 const char *n = memchr (module_name, from, namelen);
698 char *a = mempcpy (alternate_name, module_name, n - module_name);
702 while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
704 a = mempcpy (a, n, p - n);
708 memcpy (a, n, namelen - (n - module_name) + 1);
711 if (!subst_name ('-', '_') && !subst_name ('_', '-'))
712 alternate_name[0] = '\0';
716 while ((f = fts_read (fts)) != NULL)
718 /* Skip a "source" subtree, which tends to be large.
719 This insane hard-coding of names is what depmod does too. */
720 if (f->fts_namelen == sizeof "source" - 1
721 && !strcmp (f->fts_name, "source"))
723 fts_set (fts, f, FTS_SKIP);
733 /* See if this file name is "MODULE_NAME.ko". */
734 if (check_suffix (f, namelen)
735 && (!memcmp (f->fts_name, module_name, namelen)
736 || !memcmp (f->fts_name, alternate_name, namelen)))
738 int fd = open64 (f->fts_accpath, O_RDONLY);
739 *file_name = strdup (f->fts_path);
741 free (modulesdir[0]);
744 else if (*file_name == NULL)
756 error = f->fts_errno;
766 free (modulesdir[0]);
770 INTDEF (dwfl_linux_kernel_find_elf)
773 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
774 We read the information from /sys/module directly. */
777 dwfl_linux_kernel_module_section_address
778 (Dwfl_Module *mod __attribute__ ((unused)),
779 void **userdata __attribute__ ((unused)),
780 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
781 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
782 const GElf_Shdr *shdr __attribute__ ((unused)),
786 if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
787 return DWARF_CB_ABORT;
789 FILE *f = fopen (sysfile, "r");
796 /* The .modinfo and .data.percpu sections are never kept
797 loaded in the kernel. If the kernel was compiled without
798 CONFIG_MODULE_UNLOAD, the .exit.* sections are not
799 actually loaded at all.
801 Setting *ADDR to -1 tells the caller this section is
802 actually absent from memory. */
804 if (!strcmp (secname, ".modinfo")
805 || !strcmp (secname, ".data.percpu")
806 || !strncmp (secname, ".exit", 5))
808 *addr = (Dwarf_Addr) -1l;
812 /* The goofy PPC64 module_frob_arch_sections function tweaks
813 the section names as a way to control other kernel code's
814 behavior, and this cruft leaks out into the /sys information.
815 The file name for ".init*" may actually look like "_init*". */
817 const bool is_init = !strncmp (secname, ".init", 5);
820 if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
821 modname, &secname[1]) < 0)
823 f = fopen (sysfile, "r");
829 /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
830 In case that size increases in the future, look for longer
831 truncated names first. */
832 size_t namelen = strlen (secname);
833 if (namelen >= MODULE_SECT_NAME_LEN)
835 int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
838 return DWARF_CB_ABORT;
839 char *end = sysfile + len;
843 f = fopen (sysfile, "r");
844 if (is_init && f == NULL && errno == ENOENT)
846 sysfile[len - namelen] = '_';
847 f = fopen (sysfile, "r");
848 sysfile[len - namelen] = '.';
851 while (f == NULL && errno == ENOENT
852 && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
860 return DWARF_CB_ABORT;
864 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
866 int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
867 : ferror_unlocked (f) ? errno : ENOEXEC);
874 return DWARF_CB_ABORT;
876 INTDEF (dwfl_linux_kernel_module_section_address)
879 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
881 FILE *f = fopen (MODULELIST, "r");
885 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
889 unsigned long int modsz;
893 /* We can't just use fscanf here because it's not easy to distinguish \n
894 from other whitespace so as to take the optional word following the
895 address but always stop at the end of the line. */
896 while (getline (&line, &linesz, f) > 0
897 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
898 modname, &modsz, &modaddr) == 3)
900 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
901 modaddr, modaddr + modsz);
908 result = check_module_notes (mod);
913 result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
919 INTDEF (dwfl_linux_kernel_report_modules)