1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2 Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
29 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30 Everything we need here is fine if its declarations just come first. */
40 #include <stdio_ext.h>
43 #include <sys/utsname.h>
47 /* Since fts.h is included before config.h, its indirect inclusions may not
48 give us the right LFS aliases of these functions, so map them manually. */
49 #ifdef _FILE_OFFSET_BITS
55 #define KERNEL_MODNAME "kernel"
57 #define MODULEDIRFMT "/lib/modules/%s"
59 #define KNOTESFILE "/sys/kernel/notes"
60 #define MODNOTESFMT "/sys/module/%s/notes"
61 #define KSYMSFILE "/proc/kallsyms"
62 #define MODULELIST "/proc/modules"
63 #define SECADDRDIRFMT "/sys/module/%s/sections/"
64 #define MODULE_SECT_NAME_LEN 32 /* Minimum any linux/module.h has had. */
67 static const char *vmlinux_suffixes[] =
78 /* Try to open the given file as it is or under the debuginfo directory. */
80 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
85 /* Don't bother trying *FNAME itself here if the path will cause it to be
86 tried because we give its own basename as DEBUGLINK_FILE. */
87 int fd = ((((dwfl->callbacks->debuginfo_path
88 ? *dwfl->callbacks->debuginfo_path : NULL)
89 ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
90 : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
94 Dwfl_Module fakemod = { .dwfl = dwfl };
95 /* First try the file's unadorned basename as DEBUGLINK_FILE,
96 to look for "vmlinux" files. */
97 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
98 *fname, basename (*fname), 0,
100 if (fd < 0 && try_debug)
101 /* Next, let the call use the default of basename + ".debug",
102 to look for "vmlinux.debug" files. */
103 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
105 &fakemod.debug.name);
106 if (fakemod.debug.name != NULL)
109 *fname = fakemod.debug.name;
115 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
119 if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
121 fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
141 static inline const char *
142 kernel_release (void)
144 /* Cache the `uname -r` string we'll use. */
145 static struct utsname utsname;
146 if (utsname.release[0] == '\0' && uname (&utsname) != 0)
148 return utsname.release;
152 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
154 if ((release[0] == '/'
155 ? asprintf (fname, "%s/vmlinux", release)
156 : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
159 int fd = try_kernel_name (dwfl, fname, true);
160 if (fd < 0 && release[0] != '/')
163 if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
165 fd = try_kernel_name (dwfl, fname, true);
172 get_release (Dwfl *dwfl, const char **release)
177 const char *release_string = release == NULL ? NULL : *release;
178 if (release_string == NULL)
180 release_string = kernel_release ();
181 if (release_string == NULL)
184 *release = release_string;
191 report_kernel (Dwfl *dwfl, const char **release,
192 int (*predicate) (const char *module, const char *file))
194 int result = get_release (dwfl, release);
195 if (unlikely (result != 0))
199 int fd = find_kernel_elf (dwfl, *release, &fname);
202 result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
203 ? 0 : errno ?: ENOENT);
208 if (predicate != NULL)
210 /* Let the predicate decide whether to use this one. */
211 int want = (*predicate) (KERNEL_MODNAME, fname);
219 /* Note that on some architectures (e.g. x86_64) the vmlinux
220 is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
221 In both cases the phdr p_vaddr load address will be non-zero.
222 We want the image to be placed as if it was ET_DYN, so
223 pass true for add_p_vaddr which will do the right thing
224 (in combination with a zero base) in either case. */
225 Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
230 /* The kernel is ET_EXEC, but always treat it as relocatable. */
231 mod->e_type = ET_DYN;
236 if (!report || result < 0)
243 /* Look for a kernel debug archive. If we find one, report all its modules.
244 If not, return ENOENT. */
246 report_kernel_archive (Dwfl *dwfl, const char **release,
247 int (*predicate) (const char *module, const char *file))
249 int result = get_release (dwfl, release);
250 if (unlikely (result != 0))
254 int res = (((*release)[0] == '/')
255 ? asprintf (&archive, "%s/debug.a", *release)
256 : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
257 if (unlikely (res < 0))
260 int fd = try_kernel_name (dwfl, &archive, false);
262 result = errno ?: ENOENT;
265 /* We have the archive file open! */
266 Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
268 if (unlikely (last == NULL))
272 /* Find the kernel and move it to the head of the list. */
273 Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
274 for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
275 if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
290 check_suffix (const FTSENT *f, size_t namelen)
293 if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1 \
294 : f->fts_namelen >= sizeof sfx) \
295 && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1), \
297 return sizeof sfx - 1
313 /* Report a kernel and all its modules found on disk, for offline use.
314 If RELEASE starts with '/', it names a directory to look in;
315 if not, it names a directory to find under /lib/modules/;
316 if null, /lib/modules/`uname -r` is used.
317 Returns zero on success, -1 if dwfl_report_module failed,
318 or an errno code if finding the files on disk failed. */
320 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
321 int (*predicate) (const char *module,
324 int result = report_kernel_archive (dwfl, &release, predicate);
325 if (result != ENOENT)
328 /* First report the kernel. */
329 result = report_kernel (dwfl, &release, predicate);
332 /* Do "find /lib/modules/RELEASE -name *.ko". */
334 char *modulesdir[] = { NULL, NULL };
335 if (release[0] == '/')
336 modulesdir[0] = (char *) release;
339 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
343 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
344 if (modulesdir[0] == (char *) release)
345 modulesdir[0] = NULL;
348 free (modulesdir[0]);
353 while ((f = fts_read (fts)) != NULL)
355 /* Skip a "source" subtree, which tends to be large.
356 This insane hard-coding of names is what depmod does too. */
357 if (f->fts_namelen == sizeof "source" - 1
358 && !strcmp (f->fts_name, "source"))
360 fts_set (fts, f, FTS_SKIP);
369 /* See if this file name matches "*.ko". */
370 const size_t suffix = check_suffix (f, 0);
373 /* We have a .ko file to report. Following the algorithm
374 by which the kernel makefiles set KBUILD_MODNAME, we
375 replace all ',' or '-' with '_' in the file name and
376 call that the module name. Modules could well be
377 built using different embedded names than their file
378 names. To handle that, we would have to look at the
379 __this_module.name contents in the module's text. */
381 char *name = strndup (f->fts_name, f->fts_namelen - suffix);
382 if (unlikely (name == NULL))
384 __libdwfl_seterrno (DWFL_E_NOMEM);
388 for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
389 if (name[i] == '-' || name[i] == ',')
392 if (predicate != NULL)
394 /* Let the predicate decide whether to use this one. */
395 int want = (*predicate) (name, f->fts_path);
409 if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
422 result = f->fts_errno;
430 /* We only get here in error cases. */
434 free (modulesdir[0]);
439 INTDEF (dwfl_linux_kernel_report_offline)
442 /* State of read_address used by intuit_kernel_bounds. */
443 struct read_address_state {
453 read_address (struct read_address_state *state, Dwarf_Addr *addr)
455 if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
456 state->line[state->n - 2] == ']')
458 *addr = strtoull (state->line, &state->p, 16);
459 state->p += strspn (state->p, " \t");
460 state->type = strsep (&state->p, " \t\n");
461 if (state->type == NULL)
463 return state->p != NULL && state->p != state->line;
467 /* Grovel around to guess the bounds of the runtime kernel image. */
469 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
471 struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
473 state.f = fopen (KSYMSFILE, "r");
477 (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
483 result = read_address (&state, start) ? 0 : -1;
484 while (result == 0 && strchr ("TtRr", *state.type) == NULL);
489 while (read_address (&state, end))
490 if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
493 Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
494 *start &= -(Dwarf_Addr) round_kernel;
495 *end += round_kernel - 1;
496 *end &= -(Dwarf_Addr) round_kernel;
497 if (*start >= *end || *end - *start < round_kernel)
503 result = ferror_unlocked (state.f) ? errno : ENOEXEC;
511 /* Look for a build ID note in NOTESFILE and associate the ID with MOD. */
513 check_notes (Dwfl_Module *mod, const char *notesfile,
514 Dwarf_Addr vaddr, const char *secname)
516 int fd = open (notesfile, O_RDONLY);
520 assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
521 assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
525 unsigned char data[8192];
528 ssize_t n = read (fd, buf.data, sizeof buf);
534 unsigned char *p = buf.data;
535 while (p < &buf.data[n])
537 /* No translation required since we are reading the native kernel. */
538 GElf_Nhdr *nhdr = (void *) p;
540 unsigned char *name = p;
541 p += (nhdr->n_namesz + 3) & -4U;
542 unsigned char *bits = p;
543 p += (nhdr->n_descsz + 3) & -4U;
545 if (p <= &buf.data[n]
546 && nhdr->n_type == NT_GNU_BUILD_ID
547 && nhdr->n_namesz == sizeof "GNU"
548 && !memcmp (name, "GNU", sizeof "GNU"))
550 /* Found it. For a module we must figure out its VADDR now. */
553 && (INTUSE(dwfl_linux_kernel_module_section_address)
554 (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
555 || vaddr == (GElf_Addr) -1l))
559 vaddr += bits - buf.data;
560 return INTUSE(dwfl_module_report_build_id) (mod, bits,
561 nhdr->n_descsz, vaddr);
568 /* Look for a build ID for the kernel. */
570 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
572 return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
575 /* Look for a build ID for a loaded kernel module. */
577 check_module_notes (Dwfl_Module *mod)
579 char *dirs[2] = { NULL, NULL };
580 if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
583 FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
592 while ((f = fts_read (fts)) != NULL)
599 result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
600 if (result > 0) /* Nothing found. */
609 result = f->fts_errno;
618 /* We only get here when finished or in error cases. */
628 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
630 Dwarf_Addr start = 0;
634 (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
636 /* This is a bit of a kludge. If we already reported the kernel,
637 don't bother figuring it out again--it never changes. */
638 for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
639 if (!strcmp (m->name, KERNEL_MODNAME))
643 return report () == NULL ? -1 : 0;
646 /* Try to figure out the bounds of the kernel image without
647 looking for any vmlinux file. */
649 /* The compiler cannot deduce that if intuit_kernel_bounds returns
650 zero NOTES will be initialized. Fake the initialization. */
651 asm ("" : "=m" (notes));
652 int result = intuit_kernel_bounds (&start, &end, ¬es);
655 Dwfl_Module *mod = report ();
656 return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
658 if (result != ENOENT)
661 /* Find the ELF file for the running kernel and dwfl_report_elf it. */
662 return report_kernel (dwfl, NULL, NULL);
664 INTDEF (dwfl_linux_kernel_report_kernel)
668 subst_name (char from, char to,
669 const char * const module_name,
670 char * const alternate_name,
671 const size_t namelen)
673 const char *n = memchr (module_name, from, namelen);
676 char *a = mempcpy (alternate_name, module_name, n - module_name);
680 while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
682 a = mempcpy (a, n, p - n);
686 memcpy (a, n, namelen - (n - module_name) + 1);
690 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules. */
693 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
694 void **userdata __attribute__ ((unused)),
695 const char *module_name,
696 Dwarf_Addr base __attribute__ ((unused)),
697 char **file_name, Elf **elfp)
699 if (mod->build_id_len > 0)
701 int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
703 if (fd >= 0 || mod->main.elf != NULL || errno != 0)
707 const char *release = kernel_release ();
711 if (!strcmp (module_name, KERNEL_MODNAME))
712 return find_kernel_elf (mod->dwfl, release, file_name);
714 /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko". */
716 char *modulesdir[] = { NULL, NULL };
717 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
720 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
723 free (modulesdir[0]);
727 size_t namelen = strlen (module_name);
729 /* This is a kludge. There is no actual necessary relationship between
730 the name of the .ko file installed and the module name the kernel
731 knows it by when it's loaded. The kernel's only idea of the module
732 name comes from the name embedded in the object's magic
733 .gnu.linkonce.this_module section.
735 In practice, these module names match the .ko file names except for
736 some using '_' and some using '-'. So our cheap kludge is to look for
737 two files when either a '_' or '-' appears in a module name, one using
738 only '_' and one only using '-'. */
740 char *alternate_name = malloc (namelen + 1);
741 if (unlikely (alternate_name == NULL))
743 free (modulesdir[0]);
746 if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
747 !subst_name ('_', '-', module_name, alternate_name, namelen))
748 alternate_name[0] = '\0';
752 while ((f = fts_read (fts)) != NULL)
754 /* Skip a "source" subtree, which tends to be large.
755 This insane hard-coding of names is what depmod does too. */
756 if (f->fts_namelen == sizeof "source" - 1
757 && !strcmp (f->fts_name, "source"))
759 fts_set (fts, f, FTS_SKIP);
769 /* See if this file name is "MODULE_NAME.ko". */
770 if (check_suffix (f, namelen)
771 && (!memcmp (f->fts_name, module_name, namelen)
772 || !memcmp (f->fts_name, alternate_name, namelen)))
774 int fd = open (f->fts_accpath, O_RDONLY);
775 *file_name = strdup (f->fts_path);
777 free (modulesdir[0]);
778 free (alternate_name);
781 else if (*file_name == NULL)
793 error = f->fts_errno;
803 free (modulesdir[0]);
804 free (alternate_name);
808 INTDEF (dwfl_linux_kernel_find_elf)
811 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
812 We read the information from /sys/module directly. */
815 dwfl_linux_kernel_module_section_address
816 (Dwfl_Module *mod __attribute__ ((unused)),
817 void **userdata __attribute__ ((unused)),
818 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
819 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
820 const GElf_Shdr *shdr __attribute__ ((unused)),
824 if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
825 return DWARF_CB_ABORT;
827 FILE *f = fopen (sysfile, "r");
834 /* The .modinfo and .data.percpu sections are never kept
835 loaded in the kernel. If the kernel was compiled without
836 CONFIG_MODULE_UNLOAD, the .exit.* sections are not
837 actually loaded at all.
839 Setting *ADDR to -1 tells the caller this section is
840 actually absent from memory. */
842 if (!strcmp (secname, ".modinfo")
843 || !strcmp (secname, ".data.percpu")
844 || !strncmp (secname, ".exit", 5))
846 *addr = (Dwarf_Addr) -1l;
850 /* The goofy PPC64 module_frob_arch_sections function tweaks
851 the section names as a way to control other kernel code's
852 behavior, and this cruft leaks out into the /sys information.
853 The file name for ".init*" may actually look like "_init*". */
855 const bool is_init = !strncmp (secname, ".init", 5);
858 if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
859 modname, &secname[1]) < 0)
861 f = fopen (sysfile, "r");
867 /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
868 In case that size increases in the future, look for longer
869 truncated names first. */
870 size_t namelen = strlen (secname);
871 if (namelen >= MODULE_SECT_NAME_LEN)
873 int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
876 return DWARF_CB_ABORT;
877 char *end = sysfile + len;
881 f = fopen (sysfile, "r");
882 if (is_init && f == NULL && errno == ENOENT)
884 sysfile[len - namelen] = '_';
885 f = fopen (sysfile, "r");
886 sysfile[len - namelen] = '.';
889 while (f == NULL && errno == ENOENT
890 && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
898 return DWARF_CB_ABORT;
902 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
904 int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
905 : ferror_unlocked (f) ? errno : ENOEXEC);
912 return DWARF_CB_ABORT;
914 INTDEF (dwfl_linux_kernel_module_section_address)
917 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
919 FILE *f = fopen (MODULELIST, "r");
923 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
927 unsigned long int modsz;
931 /* We can't just use fscanf here because it's not easy to distinguish \n
932 from other whitespace so as to take the optional word following the
933 address but always stop at the end of the line. */
934 while (getline (&line, &linesz, f) > 0
935 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
936 modname, &modsz, &modaddr) == 3)
938 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
939 modaddr, modaddr + modsz);
946 result = check_module_notes (mod);
951 result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
957 INTDEF (dwfl_linux_kernel_report_modules)