1 /* Print the strings of printable characters in files.
2 Copyright (C) 2005-2010, 2012, 2014 Red Hat, Inc.
3 This file is part of elfutils.
4 Written by Ulrich Drepper <drepper@redhat.com>, 2005.
6 This file is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 elfutils is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 #include <stdio_ext.h>
44 #include <printversion.h>
47 # define MAP_POPULATE 0
51 /* Prototypes of local functions. */
52 static int read_fd (int fd, const char *fname, off_t fdlen);
53 static int read_elf (Elf *elf, int fd, const char *fname, off_t fdlen);
56 /* Name and version of program. */
57 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
59 /* Bug report address. */
60 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
62 /* Definitions of arguments for argp functions. */
63 static const struct argp_option options[] =
65 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
66 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
67 { "bytes", 'n', "MIN-LEN", 0,
68 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
69 { "encoding", 'e', "SELECTOR", 0, N_("\
70 Select character size and endianess: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
72 { "print-file-name", 'f', NULL, 0,
73 N_("Print name of the file before each string."), 0 },
74 { "radix", 't', "{o,d,x}", 0,
75 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
76 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
78 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
79 { NULL, 0, NULL, 0, NULL, 0 }
82 /* Short description of program. */
83 static const char doc[] = N_("\
84 Print the strings of printable characters in files.");
86 /* Strings for arguments in help texts. */
87 static const char args_doc[] = N_("[FILE...]");
89 /* Prototype for option handler. */
90 static error_t parse_opt (int key, char *arg, struct argp_state *state);
92 /* Data structure to communicate with argp functions. */
93 static struct argp argp =
95 options, parse_opt, args_doc, doc, NULL, NULL, NULL
99 /* Global variables. */
101 /* True if whole file and not only loaded sections are looked at. */
102 static bool entire_file;
104 /* Minimum length of any sequence reported. */
105 static size_t min_len = 4;
107 /* Number of bytes per character. */
108 static size_t bytes_per_char = 1;
110 /* Minimum length of any sequence reported in bytes. */
111 static size_t min_len_bytes;
113 /* True if multibyte characters are in big-endian order. */
114 static bool big_endian;
116 /* True unless 7-bit ASCII are expected. */
117 static bool char_7bit;
119 /* True if file names should be printed before strings. */
120 static bool print_file_name;
122 /* Radix for printed numbers. */
129 } radix = radix_none;
132 /* Page size in use. */
136 /* Mapped parts of the ELF file. */
137 static unsigned char *elfmap;
138 static unsigned char *elfmap_base;
139 static size_t elfmap_size;
140 static off_t elfmap_off;
144 main (int argc, char *argv[])
146 /* We use no threads. */
147 __fsetlocking (stdin, FSETLOCKING_BYCALLER);
148 __fsetlocking (stdout, FSETLOCKING_BYCALLER);
151 (void) setlocale (LC_ALL, "");
153 /* Make sure the message catalog can be found. */
154 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
156 /* Initialize the message catalog. */
157 (void) textdomain (PACKAGE_TARNAME);
159 /* Parse and process arguments. */
161 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
163 /* Tell the library which version we are expecting. */
164 elf_version (EV_CURRENT);
166 /* Determine the page size. We will likely need it a couple of times. */
167 ps = sysconf (_SC_PAGESIZE);
171 if (remaining == argc)
172 /* We read from standard input. This we cannot do for a
174 result = read_fd (STDIN_FILENO,
175 print_file_name ? "{standard input}" : NULL,
176 (fstat (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
177 ? st.st_size : INT64_C (0x7fffffffffffffff));
181 int fd = (strcmp (argv[remaining], "-") == 0
182 ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
183 if (unlikely (fd == -1))
185 error (0, errno, gettext ("cannot open '%s'"), argv[remaining]);
190 const char *fname = print_file_name ? argv[remaining] : NULL;
191 int fstat_fail = fstat (fd, &st);
192 off_t fdlen = (fstat_fail
193 ? INT64_C (0x7fffffffffffffff) : st.st_size);
194 if (fdlen > (off_t) min_len_bytes)
199 || !S_ISREG (st.st_mode)
200 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
201 || elf_kind (elf) != ELF_K_ELF)
202 result |= read_fd (fd, fname, fdlen);
204 result |= read_elf (elf, fd, fname, fdlen);
206 /* This call will succeed even if ELF is NULL. */
210 if (strcmp (argv[remaining], "-") != 0)
214 if (elfmap != NULL && elfmap != MAP_FAILED)
215 munmap (elfmap, elfmap_size);
218 while (++remaining < argc);
224 /* Handle program arguments. */
226 parse_opt (int key, char *arg,
227 struct argp_state *state __attribute__ ((unused)))
236 /* We expect a string of one character. */
237 switch (arg[1] != '\0' ? '\0' : arg[0])
241 char_7bit = arg[0] == 's';
252 bytes_per_char = isupper (arg[0]) ? 4 : 2;
256 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
258 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
259 return ARGP_ERR_UNKNOWN;
264 print_file_name = true;
268 min_len = atoi (arg);
278 radix = radix_decimal;
291 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
293 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
294 return ARGP_ERR_UNKNOWN;
299 /* Compute the length in bytes of any match. */
300 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
301 error (EXIT_FAILURE, 0,
302 gettext ("invalid minimum length of matched string size"));
303 min_len_bytes = min_len * bytes_per_char;
307 return ARGP_ERR_UNKNOWN;
314 process_chunk_mb (const char *fname, const unsigned char *buf, off_t to,
315 size_t len, char **unprinted)
317 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
318 const unsigned char *start = buf;
319 while (len >= bytes_per_char)
323 if (bytes_per_char == 2)
326 ch = buf[0] << 8 | buf[1];
328 ch = buf[1] << 8 | buf[0];
333 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
335 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
338 if (ch <= 255 && (isprint (ch) || ch == '\t'))
345 if (curlen >= min_len)
347 /* We found a match. */
348 if (unlikely (fname != NULL))
350 fputs_unlocked (fname, stdout);
351 fputs_unlocked (": ", stdout);
354 if (unlikely (radix != radix_none))
355 printf ((radix == radix_octal ? "%7" PRIo64 " "
356 : (radix == radix_decimal ? "%7" PRId64 " "
358 (int64_t) to - len - (buf - start));
360 if (unlikely (*unprinted != NULL))
362 fputs_unlocked (*unprinted, stdout);
367 /* There is no sane way of printing the string. If we
368 assume the file data is encoded in UCS-2/UTF-16 or
369 UCS-4/UTF-32 respectively we could covert the string.
370 But there is no such guarantee. */
371 fwrite_unlocked (start, 1, buf - start, stdout);
372 putc_unlocked ('\n', stdout);
386 *unprinted = xstrndup ((const char *) start, curlen);
391 process_chunk (const char *fname, const unsigned char *buf, off_t to,
392 size_t len, char **unprinted)
394 /* We are not going to slow the check down for the 2- and 4-byte
395 encodings. Handle them special. */
396 if (unlikely (bytes_per_char != 1))
398 process_chunk_mb (fname, buf, to, len, unprinted);
402 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
403 const unsigned char *start = buf;
406 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
413 if (curlen >= min_len)
415 /* We found a match. */
416 if (likely (fname != NULL))
418 fputs_unlocked (fname, stdout);
419 fputs_unlocked (": ", stdout);
422 if (likely (radix != radix_none))
423 printf ((radix == radix_octal ? "%7" PRIo64 " "
424 : (radix == radix_decimal ? "%7" PRId64 " "
426 (int64_t) to - len - (buf - start));
428 if (unlikely (*unprinted != NULL))
430 fputs_unlocked (*unprinted, stdout);
434 fwrite_unlocked (start, 1, buf - start, stdout);
435 putc_unlocked ('\n', stdout);
449 *unprinted = xstrndup ((const char *) start, curlen);
453 /* Map a file in as large chunks as possible. */
455 map_file (int fd, off_t start_off, off_t fdlen, size_t *map_sizep)
457 /* Maximum size we mmap. We use an #ifdef to avoid overflows on
458 32-bit machines. 64-bit machines these days do not have usable
459 address spaces larger than about 43 bits. Not that any file
460 should be that large. */
461 # if SIZE_MAX > 0xffffffff
462 const size_t mmap_max = 0x4000000000lu;
464 const size_t mmap_max = 0x40000000lu;
467 /* Try to mmap the file. */
468 size_t map_size = MIN ((off_t) mmap_max, fdlen);
469 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
470 roundup (2 * min_len_bytes + 1, ps));
474 /* We map the memory for reading only here. Since we will
475 always look at every byte of the file it makes sense to
477 mem = mmap (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
479 if (mem != MAP_FAILED)
481 /* We will go through the mapping sequentially. */
482 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
485 if (errno != EINVAL && errno != ENOMEM)
486 /* This is an error other than the lack of address space. */
489 /* Maybe the size of the mapping is too big. Try again. */
491 if (map_size < map_size_min)
492 /* That size should have fit. */
496 *map_sizep = map_size;
501 /* Read the file without mapping. */
503 read_block_no_mmap (int fd, const char *fname, off_t from, off_t fdlen)
505 char *unprinted = NULL;
506 #define CHUNKSIZE 65536
507 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
508 + bytes_per_char - 1);
513 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
514 MIN (fdlen, CHUNKSIZE)));
517 /* There are less than MIN_LEN+1 bytes left so there cannot be
519 assert (unprinted == NULL || ntrailer == 0);
522 if (unlikely (n < 0))
524 /* Something went wrong. */
529 /* Account for the number of bytes read in this round. */
532 /* Do not use the signed N value. Note that the addition cannot
534 size_t nb = (size_t) n + ntrailer;
535 if (nb >= min_len_bytes)
537 /* We only use complete characters. */
538 nb &= ~(bytes_per_char - 1);
540 process_chunk (fname, buf, from + nb, nb, &unprinted);
542 /* If the last bytes of the buffer (modulo the character
543 size) have been printed we are not copying them. */
544 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
546 memmove (buf, buf + nb - to_keep, to_keep);
556 /* Don't print anything we collected so far. There is no
557 terminating NUL byte. */
565 read_block (int fd, const char *fname, off_t fdlen, off_t from, off_t to)
569 /* We need a completely new mapping. */
570 elfmap_off = from & ~(ps - 1);
571 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
573 if (unlikely (elfmap == MAP_FAILED))
574 /* Let the kernel know we are going to read everything in sequence. */
575 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
578 if (unlikely (elfmap == MAP_FAILED))
580 /* Read from the file descriptor. For this we must position the
582 // XXX Eventually add flag which avoids this if the position
583 // XXX is known to match.
584 if (from != 0 && lseek (fd, from, SEEK_SET) != from)
585 error (EXIT_FAILURE, errno, gettext ("lseek failed"));
587 return read_block_no_mmap (fd, fname, from, to - from);
590 assert ((off_t) min_len_bytes < fdlen);
592 if (to < (off_t) elfmap_off || from > (off_t) (elfmap_off + elfmap_size))
594 /* The existing mapping cannot fit at all. Map the new area.
595 We always map the full range of ELFMAP_SIZE bytes even if
596 this extend beyond the end of the file. The Linux kernel
597 handles this OK if the access pages are not touched. */
598 elfmap_off = from & ~(ps - 1);
599 if (mmap (elfmap, elfmap_size, PROT_READ,
600 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
602 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
603 elfmap_base = elfmap;
606 char *unprinted = NULL;
608 /* Use the existing mapping as much as possible. If necessary, map
610 if (from >= (off_t) elfmap_off
611 && from < (off_t) (elfmap_off + elfmap_size))
612 /* There are at least a few bytes in this mapping which we can
614 process_chunk (fname, elfmap_base + (from - elfmap_off),
615 MIN (to, (off_t) (elfmap_off + elfmap_size)),
616 MIN (to, (off_t) (elfmap_off + elfmap_size)) - from,
619 if (to > (off_t) (elfmap_off + elfmap_size))
621 unsigned char *remap_base = elfmap_base;
622 size_t read_now = elfmap_size - (elfmap_base - elfmap);
624 assert (from >= (off_t) elfmap_off
625 && from < (off_t) (elfmap_off + elfmap_size));
626 off_t handled_to = elfmap_off + elfmap_size;
627 assert (elfmap == elfmap_base
628 || (elfmap_base - elfmap
629 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
630 if (elfmap == elfmap_base)
632 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
633 assert (elfmap_size >= keep_area + ps);
634 /* The keep area is used for the content of the previous
635 buffer we have to keep. This means copying those bytes
636 and for this we have to make the data writable. */
637 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
639 error (EXIT_FAILURE, errno, gettext ("mprotect failed"));
641 elfmap_base = elfmap + keep_area;
646 /* Map the rest of the file, eventually again in pieces.
647 We speed things up with a nice Linux feature. Note
648 that we have at least two pages mapped. */
649 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
651 assert (read_now >= to_keep);
652 memmove (elfmap_base - to_keep,
653 remap_base + read_now - to_keep, to_keep);
654 remap_base = elfmap_base;
656 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
658 read_now = MIN (to - handled_to,
659 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
661 assert (handled_to % ps == 0);
662 assert (handled_to % bytes_per_char == 0);
663 if (mmap (remap_base, read_now, PROT_READ,
664 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
666 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
667 elfmap_off = handled_to;
669 process_chunk (fname, remap_base - to_keep,
670 elfmap_off + (read_now & ~(bytes_per_char - 1)),
671 to_keep + (read_now & ~(bytes_per_char - 1)),
673 handled_to += read_now;
674 if (handled_to >= to)
679 /* Don't print anything we collected so far. There is no
680 terminating NUL byte. */
688 read_fd (int fd, const char *fname, off_t fdlen)
690 return read_block (fd, fname, fdlen, 0, fdlen);
695 read_elf (Elf *elf, int fd, const char *fname, off_t fdlen)
699 /* We will look at each section separately. The ELF file is not
700 mmapped. The libelf implementation will load the needed parts on
701 demand. Since we only interate over the section header table the
702 memory consumption at this stage is kept minimal. */
703 Elf_Scn *scn = elf_nextscn (elf, NULL);
705 return read_fd (fd, fname, fdlen);
711 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
713 /* Only look in sections which are loaded at runtime and
714 actually have content. */
715 if (shdr != NULL && shdr->sh_type != SHT_NOBITS
716 && (shdr->sh_flags & SHF_ALLOC) != 0)
718 if (shdr->sh_offset > (Elf64_Off) fdlen
719 || fdlen - shdr->sh_offset < shdr->sh_size)
723 if (unlikely (elf_getshdrstrndx (elf, &strndx) < 0))
726 sname = elf_strptr (elf, strndx, shdr->sh_name) ?: "<unknown>";
728 gettext ("Skipping section %zd '%s' data outside file"),
729 elf_ndxscn (scn), sname);
733 result |= read_block (fd, fname, fdlen, shdr->sh_offset,
734 shdr->sh_offset + shdr->sh_size);
737 while ((scn = elf_nextscn (elf, scn)) != NULL);
739 if (elfmap != NULL && elfmap != MAP_FAILED)
740 munmap (elfmap, elfmap_size);
747 #include "debugpred.h"