1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2 Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3 This file is part of elfutils.
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
18 or both in parallel, as here.
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
29 /* In case we have a bad fts we include this before config.h because it
30 can't handle _FILE_OFFSET_BITS.
31 Everything we need here is fine if its declarations just come first. */
43 #include <stdio_ext.h>
46 #include <sys/utsname.h>
50 /* If fts.h is included before config.h, its indirect inclusions may not
51 give us the right LFS aliases of these functions, so map them manually. */
53 #ifdef _FILE_OFFSET_BITS
62 #define KERNEL_MODNAME "kernel"
64 #define MODULEDIRFMT "/lib/modules/%s"
66 #define KNOTESFILE "/sys/kernel/notes"
67 #define MODNOTESFMT "/sys/module/%s/notes"
68 #define KSYMSFILE "/proc/kallsyms"
69 #define MODULELIST "/proc/modules"
70 #define SECADDRDIRFMT "/sys/module/%s/sections/"
71 #define MODULE_SECT_NAME_LEN 32 /* Minimum any linux/module.h has had. */
74 static const char *vmlinux_suffixes[] =
85 /* Try to open the given file as it is or under the debuginfo directory. */
87 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
92 /* Don't bother trying *FNAME itself here if the path will cause it to be
93 tried because we give its own basename as DEBUGLINK_FILE. */
94 int fd = ((((dwfl->callbacks->debuginfo_path
95 ? *dwfl->callbacks->debuginfo_path : NULL)
96 ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
97 : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
101 Dwfl_Module fakemod = { .dwfl = dwfl };
104 /* Passing NULL for DEBUGLINK_FILE searches for both the basenamer
105 "vmlinux" and the default of basename + ".debug", to look for
106 "vmlinux.debug" files. */
107 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
109 &fakemod.debug.name);
111 /* Try the file's unadorned basename as DEBUGLINK_FILE,
112 to look only for "vmlinux" files. */
113 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
114 *fname, basename (*fname),
115 0, &fakemod.debug.name);
117 if (fakemod.debug.name != NULL)
120 *fname = fakemod.debug.name;
126 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
130 if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
132 fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
152 static inline const char *
153 kernel_release (void)
155 /* Cache the `uname -r` string we'll use. */
156 static struct utsname utsname;
157 if (utsname.release[0] == '\0' && uname (&utsname) != 0)
159 return utsname.release;
163 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
165 if ((release[0] == '/'
166 ? asprintf (fname, "%s/vmlinux", release)
167 : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
170 int fd = try_kernel_name (dwfl, fname, true);
171 if (fd < 0 && release[0] != '/')
174 if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
176 fd = try_kernel_name (dwfl, fname, true);
183 get_release (Dwfl *dwfl, const char **release)
188 const char *release_string = release == NULL ? NULL : *release;
189 if (release_string == NULL)
191 release_string = kernel_release ();
192 if (release_string == NULL)
195 *release = release_string;
202 report_kernel (Dwfl *dwfl, const char **release,
203 int (*predicate) (const char *module, const char *file))
205 int result = get_release (dwfl, release);
206 if (unlikely (result != 0))
210 int fd = find_kernel_elf (dwfl, *release, &fname);
213 result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
214 ? 0 : errno ?: ENOENT);
219 if (predicate != NULL)
221 /* Let the predicate decide whether to use this one. */
222 int want = (*predicate) (KERNEL_MODNAME, fname);
230 /* Note that on some architectures (e.g. x86_64) the vmlinux
231 is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
232 In both cases the phdr p_vaddr load address will be non-zero.
233 We want the image to be placed as if it was ET_DYN, so
234 pass true for add_p_vaddr which will do the right thing
235 (in combination with a zero base) in either case. */
236 Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
241 /* The kernel is ET_EXEC, but always treat it as relocatable. */
242 mod->e_type = ET_DYN;
247 if (!report || result < 0)
254 /* Look for a kernel debug archive. If we find one, report all its modules.
255 If not, return ENOENT. */
257 report_kernel_archive (Dwfl *dwfl, const char **release,
258 int (*predicate) (const char *module, const char *file))
260 int result = get_release (dwfl, release);
261 if (unlikely (result != 0))
265 int res = (((*release)[0] == '/')
266 ? asprintf (&archive, "%s/debug.a", *release)
267 : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
268 if (unlikely (res < 0))
271 int fd = try_kernel_name (dwfl, &archive, false);
273 result = errno ?: ENOENT;
276 /* We have the archive file open! */
277 Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
279 if (unlikely (last == NULL))
283 /* Find the kernel and move it to the head of the list. */
284 Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
285 for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
286 if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
301 check_suffix (const FTSENT *f, size_t namelen)
304 if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1 \
305 : f->fts_namelen >= sizeof sfx) \
306 && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1), \
308 return sizeof sfx - 1
324 /* Report a kernel and all its modules found on disk, for offline use.
325 If RELEASE starts with '/', it names a directory to look in;
326 if not, it names a directory to find under /lib/modules/;
327 if null, /lib/modules/`uname -r` is used.
328 Returns zero on success, -1 if dwfl_report_module failed,
329 or an errno code if finding the files on disk failed. */
331 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
332 int (*predicate) (const char *module,
335 int result = report_kernel_archive (dwfl, &release, predicate);
336 if (result != ENOENT)
339 /* First report the kernel. */
340 result = report_kernel (dwfl, &release, predicate);
343 /* Do "find /lib/modules/RELEASE -name *.ko". */
345 char *modulesdir[] = { NULL, NULL };
346 if (release[0] == '/')
347 modulesdir[0] = (char *) release;
350 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
354 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
355 if (modulesdir[0] == (char *) release)
356 modulesdir[0] = NULL;
359 free (modulesdir[0]);
364 while ((f = fts_read (fts)) != NULL)
366 /* Skip a "source" subtree, which tends to be large.
367 This insane hard-coding of names is what depmod does too. */
368 if (f->fts_namelen == sizeof "source" - 1
369 && !strcmp (f->fts_name, "source"))
371 fts_set (fts, f, FTS_SKIP);
380 /* See if this file name matches "*.ko". */
381 const size_t suffix = check_suffix (f, 0);
384 /* We have a .ko file to report. Following the algorithm
385 by which the kernel makefiles set KBUILD_MODNAME, we
386 replace all ',' or '-' with '_' in the file name and
387 call that the module name. Modules could well be
388 built using different embedded names than their file
389 names. To handle that, we would have to look at the
390 __this_module.name contents in the module's text. */
392 char *name = strndup (f->fts_name, f->fts_namelen - suffix);
393 if (unlikely (name == NULL))
395 __libdwfl_seterrno (DWFL_E_NOMEM);
399 for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
400 if (name[i] == '-' || name[i] == ',')
403 if (predicate != NULL)
405 /* Let the predicate decide whether to use this one. */
406 int want = (*predicate) (name, f->fts_path);
420 if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
433 result = f->fts_errno;
441 /* We only get here in error cases. */
445 free (modulesdir[0]);
450 INTDEF (dwfl_linux_kernel_report_offline)
453 /* State of read_address used by intuit_kernel_bounds. */
454 struct read_address_state {
464 read_address (struct read_address_state *state, Dwarf_Addr *addr)
466 if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
467 state->line[state->n - 2] == ']')
469 *addr = strtoull (state->line, &state->p, 16);
470 state->p += strspn (state->p, " \t");
471 state->type = strsep (&state->p, " \t\n");
472 if (state->type == NULL)
474 return state->p != NULL && state->p != state->line;
478 /* Grovel around to guess the bounds of the runtime kernel image. */
480 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
482 struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
484 state.f = fopen (KSYMSFILE, "r");
488 (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
494 result = read_address (&state, start) ? 0 : -1;
495 while (result == 0 && strchr ("TtRr", *state.type) == NULL);
500 while (read_address (&state, end))
501 if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
504 Dwarf_Addr round_kernel = sysconf (_SC_PAGESIZE);
505 *start &= -(Dwarf_Addr) round_kernel;
506 *end += round_kernel - 1;
507 *end &= -(Dwarf_Addr) round_kernel;
508 if (*start >= *end || *end - *start < round_kernel)
514 result = ferror_unlocked (state.f) ? errno : ENOEXEC;
522 /* Look for a build ID note in NOTESFILE and associate the ID with MOD. */
524 check_notes (Dwfl_Module *mod, const char *notesfile,
525 Dwarf_Addr vaddr, const char *secname)
527 int fd = open (notesfile, O_RDONLY);
531 assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
532 assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
536 unsigned char data[8192];
539 ssize_t n = read (fd, buf.data, sizeof buf);
545 unsigned char *p = buf.data;
546 while (p < &buf.data[n])
548 /* No translation required since we are reading the native kernel. */
549 GElf_Nhdr *nhdr = (void *) p;
551 unsigned char *name = p;
552 p += (nhdr->n_namesz + 3) & -4U;
553 unsigned char *bits = p;
554 p += (nhdr->n_descsz + 3) & -4U;
556 if (p <= &buf.data[n]
557 && nhdr->n_type == NT_GNU_BUILD_ID
558 && nhdr->n_namesz == sizeof "GNU"
559 && !memcmp (name, "GNU", sizeof "GNU"))
561 /* Found it. For a module we must figure out its VADDR now. */
564 && (INTUSE(dwfl_linux_kernel_module_section_address)
565 (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
566 || vaddr == (GElf_Addr) -1l))
570 vaddr += bits - buf.data;
571 return INTUSE(dwfl_module_report_build_id) (mod, bits,
572 nhdr->n_descsz, vaddr);
579 /* Look for a build ID for the kernel. */
581 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
583 return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
586 /* Look for a build ID for a loaded kernel module. */
588 check_module_notes (Dwfl_Module *mod)
590 char *dirs[2] = { NULL, NULL };
591 if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
594 FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
603 while ((f = fts_read (fts)) != NULL)
610 result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
611 if (result > 0) /* Nothing found. */
620 result = f->fts_errno;
629 /* We only get here when finished or in error cases. */
639 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
641 Dwarf_Addr start = 0;
645 (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
647 /* This is a bit of a kludge. If we already reported the kernel,
648 don't bother figuring it out again--it never changes. */
649 for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
650 if (!strcmp (m->name, KERNEL_MODNAME))
654 return report () == NULL ? -1 : 0;
657 /* Try to figure out the bounds of the kernel image without
658 looking for any vmlinux file. */
660 /* The compiler cannot deduce that if intuit_kernel_bounds returns
661 zero NOTES will be initialized. Fake the initialization. */
662 asm ("" : "=m" (notes));
663 int result = intuit_kernel_bounds (&start, &end, ¬es);
666 Dwfl_Module *mod = report ();
667 return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
669 if (result != ENOENT)
672 /* Find the ELF file for the running kernel and dwfl_report_elf it. */
673 return report_kernel (dwfl, NULL, NULL);
675 INTDEF (dwfl_linux_kernel_report_kernel)
679 subst_name (char from, char to,
680 const char * const module_name,
681 char * const alternate_name,
682 const size_t namelen)
684 const char *n = memchr (module_name, from, namelen);
687 char *a = mempcpy (alternate_name, module_name, n - module_name);
691 while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
693 a = mempcpy (a, n, p - n);
697 memcpy (a, n, namelen - (n - module_name) + 1);
701 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules. */
704 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
705 void **userdata __attribute__ ((unused)),
706 const char *module_name,
707 Dwarf_Addr base __attribute__ ((unused)),
708 char **file_name, Elf **elfp)
710 if (mod->build_id_len > 0)
712 int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
714 if (fd >= 0 || mod->main.elf != NULL || errno != 0)
718 const char *release = kernel_release ();
722 if (!strcmp (module_name, KERNEL_MODNAME))
723 return find_kernel_elf (mod->dwfl, release, file_name);
725 /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko". */
727 char *modulesdir[] = { NULL, NULL };
728 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
731 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
734 free (modulesdir[0]);
738 size_t namelen = strlen (module_name);
740 /* This is a kludge. There is no actual necessary relationship between
741 the name of the .ko file installed and the module name the kernel
742 knows it by when it's loaded. The kernel's only idea of the module
743 name comes from the name embedded in the object's magic
744 .gnu.linkonce.this_module section.
746 In practice, these module names match the .ko file names except for
747 some using '_' and some using '-'. So our cheap kludge is to look for
748 two files when either a '_' or '-' appears in a module name, one using
749 only '_' and one only using '-'. */
751 char *alternate_name = malloc (namelen + 1);
752 if (unlikely (alternate_name == NULL))
754 free (modulesdir[0]);
757 if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
758 !subst_name ('_', '-', module_name, alternate_name, namelen))
759 alternate_name[0] = '\0';
763 while ((f = fts_read (fts)) != NULL)
765 /* Skip a "source" subtree, which tends to be large.
766 This insane hard-coding of names is what depmod does too. */
767 if (f->fts_namelen == sizeof "source" - 1
768 && !strcmp (f->fts_name, "source"))
770 fts_set (fts, f, FTS_SKIP);
780 /* See if this file name is "MODULE_NAME.ko". */
781 if (check_suffix (f, namelen)
782 && (!memcmp (f->fts_name, module_name, namelen)
783 || !memcmp (f->fts_name, alternate_name, namelen)))
785 int fd = open (f->fts_accpath, O_RDONLY);
786 *file_name = strdup (f->fts_path);
788 free (modulesdir[0]);
789 free (alternate_name);
792 else if (*file_name == NULL)
804 error = f->fts_errno;
814 free (modulesdir[0]);
815 free (alternate_name);
819 INTDEF (dwfl_linux_kernel_find_elf)
822 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
823 We read the information from /sys/module directly. */
826 dwfl_linux_kernel_module_section_address
827 (Dwfl_Module *mod __attribute__ ((unused)),
828 void **userdata __attribute__ ((unused)),
829 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
830 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
831 const GElf_Shdr *shdr __attribute__ ((unused)),
835 if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
836 return DWARF_CB_ABORT;
838 FILE *f = fopen (sysfile, "r");
845 /* The .modinfo and .data.percpu sections are never kept
846 loaded in the kernel. If the kernel was compiled without
847 CONFIG_MODULE_UNLOAD, the .exit.* sections are not
848 actually loaded at all.
850 Setting *ADDR to -1 tells the caller this section is
851 actually absent from memory. */
853 if (!strcmp (secname, ".modinfo")
854 || !strcmp (secname, ".data.percpu")
855 || !strncmp (secname, ".exit", 5))
857 *addr = (Dwarf_Addr) -1l;
861 /* The goofy PPC64 module_frob_arch_sections function tweaks
862 the section names as a way to control other kernel code's
863 behavior, and this cruft leaks out into the /sys information.
864 The file name for ".init*" may actually look like "_init*". */
866 const bool is_init = !strncmp (secname, ".init", 5);
869 if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
870 modname, &secname[1]) < 0)
872 f = fopen (sysfile, "r");
878 /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
879 In case that size increases in the future, look for longer
880 truncated names first. */
881 size_t namelen = strlen (secname);
882 if (namelen >= MODULE_SECT_NAME_LEN)
884 int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
887 return DWARF_CB_ABORT;
888 char *end = sysfile + len;
892 f = fopen (sysfile, "r");
893 if (is_init && f == NULL && errno == ENOENT)
895 sysfile[len - namelen] = '_';
896 f = fopen (sysfile, "r");
897 sysfile[len - namelen] = '.';
900 while (f == NULL && errno == ENOENT
901 && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
909 return DWARF_CB_ABORT;
913 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
915 int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
916 : ferror_unlocked (f) ? errno : ENOEXEC);
923 return DWARF_CB_ABORT;
925 INTDEF (dwfl_linux_kernel_module_section_address)
928 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
930 FILE *f = fopen (MODULELIST, "r");
934 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
938 unsigned long int modsz;
942 /* We can't just use fscanf here because it's not easy to distinguish \n
943 from other whitespace so as to take the optional word following the
944 address but always stop at the end of the line. */
945 while (getline (&line, &linesz, f) > 0
946 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
947 modname, &modsz, &modaddr) == 3)
949 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
950 modaddr, modaddr + modsz);
957 result = check_module_notes (mod);
962 result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
968 INTDEF (dwfl_linux_kernel_report_modules)