1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2 Copyright (C) 2005-2011 Red Hat, Inc.
3 This file is part of Red Hat elfutils.
5 Red Hat elfutils is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by the
7 Free Software Foundation; version 2 of the License.
9 Red Hat elfutils is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with Red Hat elfutils; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
18 In addition, as a special exception, Red Hat, Inc. gives You the
19 additional right to link the code of Red Hat elfutils with code licensed
20 under any Open Source Initiative certified open source license
21 (http://www.opensource.org/licenses/index.php) which requires the
22 distribution of source code with any binary distribution and to
23 distribute linked combinations of the two. Non-GPL Code permitted under
24 this exception must only link to the code of Red Hat elfutils through
25 those well defined interfaces identified in the file named EXCEPTION
26 found in the source code files (the "Approved Interfaces"). The files
27 of Non-GPL Code may instantiate templates or use macros or inline
28 functions from the Approved Interfaces without causing the resulting
29 work to be covered by the GNU General Public License. Only Red Hat,
30 Inc. may make changes or additions to the list of Approved Interfaces.
31 Red Hat's grant of this exception is conditioned upon your not adding
32 any new exceptions. If you wish to add a new Approved Interface or
33 exception, please contact Red Hat. You must obey the GNU General Public
34 License in all respects for all of the Red Hat elfutils code and other
35 code used in conjunction with Red Hat elfutils except the Non-GPL Code
36 covered by this exception. If you modify this file, you may extend this
37 exception to your version of the file, but you are not obligated to do
38 so. If you do not wish to provide this exception without modification,
39 you must delete this exception statement from your version and license
40 this file solely under the GPL without exception.
42 Red Hat elfutils is an included package of the Open Invention Network.
43 An included package of the Open Invention Network is a package for which
44 Open Invention Network licensees cross-license their patents. No patent
45 license is granted, either expressly or impliedly, by designation as an
46 included package. Should you wish to participate in the Open Invention
47 Network licensing program, please visit www.openinventionnetwork.com
48 <http://www.openinventionnetwork.com>. */
50 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
51 Everything we need here is fine if its declarations just come first. */
61 #include <stdio_ext.h>
64 #include <sys/utsname.h>
69 #define KERNEL_MODNAME "kernel"
71 #define MODULEDIRFMT "/lib/modules/%s"
73 #define KNOTESFILE "/sys/kernel/notes"
74 #define MODNOTESFMT "/sys/module/%s/notes"
75 #define KSYMSFILE "/proc/kallsyms"
76 #define MODULELIST "/proc/modules"
77 #define SECADDRDIRFMT "/sys/module/%s/sections/"
78 #define MODULE_SECT_NAME_LEN 32 /* Minimum any linux/module.h has had. */
81 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
82 static const char *vmlinux_suffixes[] =
96 /* Try to open the given file as it is or under the debuginfo directory. */
98 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
103 /* Don't bother trying *FNAME itself here if the path will cause it to be
104 tried because we give its own basename as DEBUGLINK_FILE. */
105 int fd = ((((dwfl->callbacks->debuginfo_path
106 ? *dwfl->callbacks->debuginfo_path : NULL)
107 ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
108 : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
112 char *debugfname = NULL;
113 Dwfl_Module fakemod = { .dwfl = dwfl };
114 /* First try the file's unadorned basename as DEBUGLINK_FILE,
115 to look for "vmlinux" files. */
116 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
117 *fname, basename (*fname), 0,
119 if (fd < 0 && try_debug)
120 /* Next, let the call use the default of basename + ".debug",
121 to look for "vmlinux.debug" files. */
122 fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
125 if (debugfname != NULL)
132 #if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
135 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
139 if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
141 fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
162 static inline const char *
163 kernel_release (void)
165 /* Cache the `uname -r` string we'll use. */
166 static struct utsname utsname;
167 if (utsname.release[0] == '\0' && uname (&utsname) != 0)
169 return utsname.release;
173 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
175 if ((release[0] == '/'
176 ? asprintf (fname, "%s/vmlinux", release)
177 : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
180 int fd = try_kernel_name (dwfl, fname, true);
181 if (fd < 0 && release[0] != '/')
184 if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
186 fd = try_kernel_name (dwfl, fname, true);
193 get_release (Dwfl *dwfl, const char **release)
198 const char *release_string = release == NULL ? NULL : *release;
199 if (release_string == NULL)
201 release_string = kernel_release ();
202 if (release_string == NULL)
205 *release = release_string;
212 report_kernel (Dwfl *dwfl, const char **release,
213 int (*predicate) (const char *module, const char *file))
215 int result = get_release (dwfl, release);
216 if (unlikely (result != 0))
220 int fd = find_kernel_elf (dwfl, *release, &fname);
223 result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
224 ? 0 : errno ?: ENOENT);
229 if (predicate != NULL)
231 /* Let the predicate decide whether to use this one. */
232 int want = (*predicate) (KERNEL_MODNAME, fname);
240 Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
245 /* The kernel is ET_EXEC, but always treat it as relocatable. */
246 mod->e_type = ET_DYN;
249 if (!report || result < 0)
258 /* Look for a kernel debug archive. If we find one, report all its modules.
259 If not, return ENOENT. */
261 report_kernel_archive (Dwfl *dwfl, const char **release,
262 int (*predicate) (const char *module, const char *file))
264 int result = get_release (dwfl, release);
265 if (unlikely (result != 0))
269 if (unlikely ((*release)[0] == '/'
270 ? asprintf (&archive, "%s/debug.a", *release)
271 : asprintf (&archive, MODULEDIRFMT "/debug.a", *release)) < 0)
274 int fd = try_kernel_name (dwfl, &archive, false);
276 result = errno ?: ENOENT;
279 /* We have the archive file open! */
280 Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
282 if (unlikely (last == NULL))
286 /* Find the kernel and move it to the head of the list. */
287 Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
288 for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
289 if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
304 check_suffix (const FTSENT *f, size_t namelen)
307 if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1 \
308 : f->fts_namelen >= sizeof sfx) \
309 && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1), \
311 return sizeof sfx - 1
326 /* Report a kernel and all its modules found on disk, for offline use.
327 If RELEASE starts with '/', it names a directory to look in;
328 if not, it names a directory to find under /lib/modules/;
329 if null, /lib/modules/`uname -r` is used.
330 Returns zero on success, -1 if dwfl_report_module failed,
331 or an errno code if finding the files on disk failed. */
333 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
334 int (*predicate) (const char *module,
337 int result = report_kernel_archive (dwfl, &release, predicate);
338 if (result != ENOENT)
341 /* First report the kernel. */
342 result = report_kernel (dwfl, &release, predicate);
345 /* Do "find /lib/modules/RELEASE -name *.ko". */
347 char *modulesdir[] = { NULL, NULL };
348 if (release[0] == '/')
349 modulesdir[0] = (char *) release;
352 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
356 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
357 if (modulesdir[0] == (char *) release)
358 modulesdir[0] = NULL;
361 free (modulesdir[0]);
366 while ((f = fts_read (fts)) != NULL)
368 /* Skip a "source" subtree, which tends to be large.
369 This insane hard-coding of names is what depmod does too. */
370 if (f->fts_namelen == sizeof "source" - 1
371 && !strcmp (f->fts_name, "source"))
373 fts_set (fts, f, FTS_SKIP);
382 /* See if this file name matches "*.ko". */
383 const size_t suffix = check_suffix (f, 0);
386 /* We have a .ko file to report. Following the algorithm
387 by which the kernel makefiles set KBUILD_MODNAME, we
388 replace all ',' or '-' with '_' in the file name and
389 call that the module name. Modules could well be
390 built using different embedded names than their file
391 names. To handle that, we would have to look at the
392 __this_module.name contents in the module's text. */
394 char name[f->fts_namelen - suffix + 1];
395 for (size_t i = 0; i < f->fts_namelen - 3U; ++i)
396 if (f->fts_name[i] == '-' || f->fts_name[i] == ',')
399 name[i] = f->fts_name[i];
400 name[f->fts_namelen - suffix] = '\0';
402 if (predicate != NULL)
404 /* Let the predicate decide whether to use this one. */
405 int want = (*predicate) (name, f->fts_path);
415 if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
426 result = f->fts_errno;
434 /* We only get here in error cases. */
438 free (modulesdir[0]);
443 INTDEF (dwfl_linux_kernel_report_offline)
446 /* Grovel around to guess the bounds of the runtime kernel image. */
448 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
450 FILE *f = fopen (KSYMSFILE, "r");
454 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
464 inline bool read_address (Dwarf_Addr *addr)
466 if ((n = getline (&line, &linesz, f)) < 1 || line[n - 2] == ']')
468 *addr = strtoull (line, &p, 16);
469 p += strspn (p, " \t");
470 type = strsep (&p, " \t\n");
473 return p != NULL && p != line;
478 result = read_address (start) ? 0 : -1;
479 while (result == 0 && strchr ("TtRr", *type) == NULL);
484 while (read_address (end))
485 if (*notes == 0 && !strcmp (p, "__start_notes\n"))
488 Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
489 *start &= -(Dwarf_Addr) round_kernel;
490 *end += round_kernel - 1;
491 *end &= -(Dwarf_Addr) round_kernel;
492 if (*start >= *end || *end - *start < round_kernel)
498 result = ferror_unlocked (f) ? errno : ENOEXEC;
506 /* Look for a build ID note in NOTESFILE and associate the ID with MOD. */
508 check_notes (Dwfl_Module *mod, const char *notesfile,
509 Dwarf_Addr vaddr, const char *secname)
511 int fd = open64 (notesfile, O_RDONLY);
515 assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
516 assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
520 unsigned char data[8192];
523 ssize_t n = read (fd, buf.data, sizeof buf);
529 unsigned char *p = buf.data;
530 while (p < &buf.data[n])
532 /* No translation required since we are reading the native kernel. */
533 GElf_Nhdr *nhdr = (void *) p;
535 unsigned char *name = p;
536 p += (nhdr->n_namesz + 3) & -4U;
537 unsigned char *bits = p;
538 p += (nhdr->n_descsz + 3) & -4U;
540 if (p <= &buf.data[n]
541 && nhdr->n_type == NT_GNU_BUILD_ID
542 && nhdr->n_namesz == sizeof "GNU"
543 && !memcmp (name, "GNU", sizeof "GNU"))
545 /* Found it. For a module we must figure out its VADDR now. */
548 && (INTUSE(dwfl_linux_kernel_module_section_address)
549 (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
550 || vaddr == (GElf_Addr) -1l))
554 vaddr += bits - buf.data;
555 return INTUSE(dwfl_module_report_build_id) (mod, bits,
556 nhdr->n_descsz, vaddr);
563 /* Look for a build ID for the kernel. */
565 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
567 return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
570 /* Look for a build ID for a loaded kernel module. */
572 check_module_notes (Dwfl_Module *mod)
574 char *dirs[2] = { NULL, NULL };
575 if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
578 FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
587 while ((f = fts_read (fts)) != NULL)
594 result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
595 if (result > 0) /* Nothing found. */
604 result = f->fts_errno;
613 /* We only get here when finished or in error cases. */
623 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
627 inline Dwfl_Module *report (void)
629 return INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end);
632 /* This is a bit of a kludge. If we already reported the kernel,
633 don't bother figuring it out again--it never changes. */
634 for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
635 if (!strcmp (m->name, KERNEL_MODNAME))
639 return report () == NULL ? -1 : 0;
642 /* Try to figure out the bounds of the kernel image without
643 looking for any vmlinux file. */
645 /* The compiler cannot deduce that if intuit_kernel_bounds returns
646 zero NOTES will be initialized. Fake the initialization. */
647 asm ("" : "=m" (notes));
648 int result = intuit_kernel_bounds (&start, &end, ¬es);
651 Dwfl_Module *mod = report ();
652 return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
654 if (result != ENOENT)
657 /* Find the ELF file for the running kernel and dwfl_report_elf it. */
658 return report_kernel (dwfl, NULL, NULL);
660 INTDEF (dwfl_linux_kernel_report_kernel)
663 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules. */
666 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
667 void **userdata __attribute__ ((unused)),
668 const char *module_name,
669 Dwarf_Addr base __attribute__ ((unused)),
670 char **file_name, Elf **elfp)
672 if (mod->build_id_len > 0)
674 int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
676 if (fd >= 0 || mod->main.elf != NULL || errno != 0)
680 const char *release = kernel_release ();
684 if (!strcmp (module_name, KERNEL_MODNAME))
685 return find_kernel_elf (mod->dwfl, release, file_name);
687 /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko". */
689 char *modulesdir[] = { NULL, NULL };
690 if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
693 FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
696 free (modulesdir[0]);
700 size_t namelen = strlen (module_name);
702 /* This is a kludge. There is no actual necessary relationship between
703 the name of the .ko file installed and the module name the kernel
704 knows it by when it's loaded. The kernel's only idea of the module
705 name comes from the name embedded in the object's magic
706 .gnu.linkonce.this_module section.
708 In practice, these module names match the .ko file names except for
709 some using '_' and some using '-'. So our cheap kludge is to look for
710 two files when either a '_' or '-' appears in a module name, one using
711 only '_' and one only using '-'. */
713 char alternate_name[namelen + 1];
714 inline bool subst_name (char from, char to)
716 const char *n = memchr (module_name, from, namelen);
719 char *a = mempcpy (alternate_name, module_name, n - module_name);
723 while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
725 a = mempcpy (a, n, p - n);
729 memcpy (a, n, namelen - (n - module_name) + 1);
732 if (!subst_name ('-', '_') && !subst_name ('_', '-'))
733 alternate_name[0] = '\0';
737 while ((f = fts_read (fts)) != NULL)
739 /* Skip a "source" subtree, which tends to be large.
740 This insane hard-coding of names is what depmod does too. */
741 if (f->fts_namelen == sizeof "source" - 1
742 && !strcmp (f->fts_name, "source"))
744 fts_set (fts, f, FTS_SKIP);
754 /* See if this file name is "MODULE_NAME.ko". */
755 if (check_suffix (f, namelen)
756 && (!memcmp (f->fts_name, module_name, namelen)
757 || !memcmp (f->fts_name, alternate_name, namelen)))
759 int fd = open64 (f->fts_accpath, O_RDONLY);
760 *file_name = strdup (f->fts_path);
762 free (modulesdir[0]);
765 else if (*file_name == NULL)
777 error = f->fts_errno;
787 free (modulesdir[0]);
791 INTDEF (dwfl_linux_kernel_find_elf)
794 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
795 We read the information from /sys/module directly. */
798 dwfl_linux_kernel_module_section_address
799 (Dwfl_Module *mod __attribute__ ((unused)),
800 void **userdata __attribute__ ((unused)),
801 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
802 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
803 const GElf_Shdr *shdr __attribute__ ((unused)),
807 if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
808 return DWARF_CB_ABORT;
810 FILE *f = fopen (sysfile, "r");
817 /* The .modinfo and .data.percpu sections are never kept
818 loaded in the kernel. If the kernel was compiled without
819 CONFIG_MODULE_UNLOAD, the .exit.* sections are not
820 actually loaded at all.
822 Setting *ADDR to -1 tells the caller this section is
823 actually absent from memory. */
825 if (!strcmp (secname, ".modinfo")
826 || !strcmp (secname, ".data.percpu")
827 || !strncmp (secname, ".exit", 5))
829 *addr = (Dwarf_Addr) -1l;
833 /* The goofy PPC64 module_frob_arch_sections function tweaks
834 the section names as a way to control other kernel code's
835 behavior, and this cruft leaks out into the /sys information.
836 The file name for ".init*" may actually look like "_init*". */
838 const bool is_init = !strncmp (secname, ".init", 5);
841 if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
842 modname, &secname[1]) < 0)
844 f = fopen (sysfile, "r");
850 /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
851 In case that size increases in the future, look for longer
852 truncated names first. */
853 size_t namelen = strlen (secname);
854 if (namelen >= MODULE_SECT_NAME_LEN)
856 int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
859 return DWARF_CB_ABORT;
860 char *end = sysfile + len;
864 f = fopen (sysfile, "r");
865 if (is_init && f == NULL && errno == ENOENT)
867 sysfile[len - namelen] = '_';
868 f = fopen (sysfile, "r");
869 sysfile[len - namelen] = '.';
872 while (f == NULL && errno == ENOENT
873 && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
881 return DWARF_CB_ABORT;
885 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
887 int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
888 : ferror_unlocked (f) ? errno : ENOEXEC);
895 return DWARF_CB_ABORT;
897 INTDEF (dwfl_linux_kernel_module_section_address)
900 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
902 FILE *f = fopen (MODULELIST, "r");
906 (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
910 unsigned long int modsz;
914 /* We can't just use fscanf here because it's not easy to distinguish \n
915 from other whitespace so as to take the optional word following the
916 address but always stop at the end of the line. */
917 while (getline (&line, &linesz, f) > 0
918 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
919 modname, &modsz, &modaddr) == 3)
921 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
922 modaddr, modaddr + modsz);
929 result = check_module_notes (mod);
934 result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
940 INTDEF (dwfl_linux_kernel_report_modules)