1 /* Print the strings of printable characters in files.
2 Copyright (C) 2005-2010, 2012 Red Hat, Inc.
3 This file is part of elfutils.
4 Written by Ulrich Drepper <drepper@redhat.com>, 2005.
6 This file is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 elfutils is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
36 #include <stdio_ext.h>
41 #include <sys/param.h>
47 /* Prototypes of local functions. */
48 static int read_fd (int fd, const char *fname, off64_t fdlen);
49 static int read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen);
52 /* Name and version of program. */
53 static void print_version (FILE *stream, struct argp_state *state);
54 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
56 /* Bug report address. */
57 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
59 /* Definitions of arguments for argp functions. */
60 static const struct argp_option options[] =
62 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
63 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
64 { "bytes", 'n', "MIN-LEN", 0,
65 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
66 { "encoding", 'e', "SELECTOR", 0, N_("\
67 Select character size and endianess: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
69 { "print-file-name", 'f', NULL, 0,
70 N_("Print name of the file before each string."), 0 },
71 { "radix", 't', "{o,d,x}", 0,
72 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
73 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
75 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
76 { NULL, 0, NULL, 0, NULL, 0 }
79 /* Short description of program. */
80 static const char doc[] = N_("\
81 Print the strings of printable characters in files.");
83 /* Strings for arguments in help texts. */
84 static const char args_doc[] = N_("[FILE...]");
86 /* Prototype for option handler. */
87 static error_t parse_opt (int key, char *arg, struct argp_state *state);
89 /* Data structure to communicate with argp functions. */
90 static struct argp argp =
92 options, parse_opt, args_doc, doc, NULL, NULL, NULL
96 /* Global variables. */
98 /* True if whole file and not only loaded sections are looked at. */
99 static bool entire_file;
101 /* Minimum length of any sequence reported. */
102 static size_t min_len = 4;
104 /* Number of bytes per character. */
105 static size_t bytes_per_char = 1;
107 /* Minimum length of any sequence reported in bytes. */
108 static size_t min_len_bytes;
110 /* True if multibyte characters are in big-endian order. */
111 static bool big_endian;
113 /* True unless 7-bit ASCII are expected. */
114 static bool char_7bit;
116 /* True if file names should be printed before strings. */
117 static bool print_file_name;
119 /* Location print format string. */
120 static const char *locfmt;
122 /* Page size in use. */
126 /* Mapped parts of the ELF file. */
127 static unsigned char *elfmap;
128 static unsigned char *elfmap_base;
129 static size_t elfmap_size;
130 static off64_t elfmap_off;
134 main (int argc, char *argv[])
136 /* We use no threads. */
137 __fsetlocking (stdin, FSETLOCKING_BYCALLER);
138 __fsetlocking (stdout, FSETLOCKING_BYCALLER);
141 (void) setlocale (LC_ALL, "");
143 /* Make sure the message catalog can be found. */
144 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
146 /* Initialize the message catalog. */
147 (void) textdomain (PACKAGE_TARNAME);
149 /* Parse and process arguments. */
151 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
153 /* Tell the library which version we are expecting. */
154 elf_version (EV_CURRENT);
156 /* Determine the page size. We will likely need it a couple of times. */
157 ps = sysconf (_SC_PAGESIZE);
161 if (remaining == argc)
162 /* We read from standard input. This we cannot do for a
164 result = read_fd (STDIN_FILENO,
165 print_file_name ? "{standard input}" : NULL,
166 (fstat64 (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
167 ? st.st_size : INT64_C (0x7fffffffffffffff));
171 int fd = (strcmp (argv[remaining], "-") == 0
172 ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
173 if (unlikely (fd == -1))
175 error (0, errno, gettext ("cannot open '%s'"), argv[remaining]);
180 const char *fname = print_file_name ? argv[remaining] : NULL;
181 int fstat_fail = fstat64 (fd, &st);
182 off64_t fdlen = (fstat_fail
183 ? INT64_C (0x7fffffffffffffff) : st.st_size);
184 if (fdlen > (off64_t) min_len_bytes)
189 || !S_ISREG (st.st_mode)
190 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
191 || elf_kind (elf) != ELF_K_ELF)
192 result |= read_fd (fd, fname, fdlen);
194 result |= read_elf (elf, fd, fname, fdlen);
196 /* This call will succeed even if ELF is NULL. */
200 if (strcmp (argv[remaining], "-") != 0)
204 if (elfmap != NULL && elfmap != MAP_FAILED)
205 munmap (elfmap, elfmap_size);
208 while (++remaining < argc);
214 /* Print the version information. */
216 print_version (FILE *stream, struct argp_state *state __attribute__ ((unused)))
218 fprintf (stream, "strings (%s) %s\n", PACKAGE_NAME, PACKAGE_VERSION);
219 fprintf (stream, gettext ("\
220 Copyright (C) %s Red Hat, Inc.\n\
221 This is free software; see the source for copying conditions. There is NO\n\
222 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
224 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
228 /* Handle program arguments. */
230 parse_opt (int key, char *arg,
231 struct argp_state *state __attribute__ ((unused)))
240 /* We expect a string of one character. */
241 switch (arg[1] != '\0' ? '\0' : arg[0])
245 char_7bit = arg[0] == 's';
256 bytes_per_char = isupper (arg[0]) ? 4 : 2;
260 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
262 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
263 return ARGP_ERR_UNKNOWN;
268 print_file_name = true;
272 min_len = atoi (arg);
282 locfmt = "%7" PRId64 " ";
287 locfmt = "%7" PRIo64 " ";
291 locfmt = "%7" PRIx64 " ";
295 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
297 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
298 return ARGP_ERR_UNKNOWN;
303 /* Compute the length in bytes of any match. */
304 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
305 error (EXIT_FAILURE, 0,
306 gettext ("invalid minimum length of matched string size"));
307 min_len_bytes = min_len * bytes_per_char;
311 return ARGP_ERR_UNKNOWN;
318 process_chunk_mb (const char *fname, const unsigned char *buf, off64_t to,
319 size_t len, char **unprinted)
321 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
322 const unsigned char *start = buf;
323 while (len >= bytes_per_char)
327 if (bytes_per_char == 2)
330 ch = buf[0] << 8 | buf[1];
332 ch = buf[1] << 8 | buf[0];
337 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
339 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
342 if (ch <= 255 && (isprint (ch) || ch == '\t'))
349 if (curlen >= min_len)
351 /* We found a match. */
352 if (unlikely (fname != NULL))
354 fputs_unlocked (fname, stdout);
355 fputs_unlocked (": ", stdout);
358 if (unlikely (locfmt != NULL))
359 printf (locfmt, (int64_t) to - len - (buf - start));
361 if (unlikely (*unprinted != NULL))
363 fputs_unlocked (*unprinted, stdout);
368 /* There is no sane way of printing the string. If we
369 assume the file data is encoded in UCS-2/UTF-16 or
370 UCS-4/UTF-32 respectively we could covert the string.
371 But there is no such guarantee. */
372 fwrite_unlocked (start, 1, buf - start, stdout);
373 putc_unlocked ('\n', stdout);
387 *unprinted = xstrndup ((const char *) start, curlen);
392 process_chunk (const char *fname, const unsigned char *buf, off64_t to,
393 size_t len, char **unprinted)
395 /* We are not going to slow the check down for the 2- and 4-byte
396 encodings. Handle them special. */
397 if (unlikely (bytes_per_char != 1))
399 process_chunk_mb (fname, buf, to, len, unprinted);
403 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
404 const unsigned char *start = buf;
407 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
414 if (curlen >= min_len)
416 /* We found a match. */
417 if (likely (fname != NULL))
419 fputs_unlocked (fname, stdout);
420 fputs_unlocked (": ", stdout);
423 if (likely (locfmt != NULL))
424 printf (locfmt, (int64_t) to - len - (buf - start));
426 if (unlikely (*unprinted != NULL))
428 fputs_unlocked (*unprinted, stdout);
432 fwrite_unlocked (start, 1, buf - start, stdout);
433 putc_unlocked ('\n', stdout);
447 *unprinted = xstrndup ((const char *) start, curlen);
451 /* Map a file in as large chunks as possible. */
453 map_file (int fd, off64_t start_off, off64_t fdlen, size_t *map_sizep)
462 /* Maximum size we mmap. We use an #ifdef to avoid overflows on
463 32-bit machines. 64-bit machines these days do not have usable
464 address spaces larger than about 43 bits. Not that any file
465 should be that large. */
466 # if SIZE_MAX > 0xffffffff
467 const size_t mmap_max = 0x4000000000lu;
469 const size_t mmap_max = 0x40000000lu;
472 /* Try to mmap the file. */
473 size_t map_size = MIN ((off64_t) mmap_max, fdlen);
474 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
475 roundup (2 * min_len_bytes + 1, ps));
479 /* We map the memory for reading only here. Since we will
480 always look at every byte of the file it makes sense to
482 mem = mmap64 (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
484 if (mem != MAP_FAILED)
486 /* We will go through the mapping sequentially. */
487 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
490 if (errno != EINVAL && errno != ENOMEM)
491 /* This is an error other than the lack of address space. */
494 /* Maybe the size of the mapping is too big. Try again. */
496 if (map_size < map_size_min)
497 /* That size should have fit. */
501 *map_sizep = map_size;
507 /* Read the file without mapping. */
509 read_block_no_mmap (int fd, const char *fname, off64_t from, off64_t fdlen)
511 char *unprinted = NULL;
512 #define CHUNKSIZE 65536
513 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
514 + bytes_per_char - 1);
519 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
520 MIN (fdlen, CHUNKSIZE)));
523 /* There are less than MIN_LEN+1 bytes left so there cannot be
525 assert (unprinted == NULL || ntrailer == 0);
528 if (unlikely (n < 0))
530 /* Something went wrong. */
535 /* Account for the number of bytes read in this round. */
538 /* Do not use the signed N value. Note that the addition cannot
540 size_t nb = (size_t) n + ntrailer;
541 if (nb >= min_len_bytes)
543 /* We only use complete characters. */
544 nb &= ~(bytes_per_char - 1);
546 process_chunk (fname, buf, from + nb, nb, &unprinted);
548 /* If the last bytes of the buffer (modulo the character
549 size) have been printed we are not copying them. */
550 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
552 memmove (buf, buf + nb - to_keep, to_keep);
562 /* Don't print anything we collected so far. There is no
563 terminating NUL byte. */
571 read_block (int fd, const char *fname, off64_t fdlen, off64_t from, off64_t to)
575 /* We need a completely new mapping. */
576 elfmap_off = from & ~(ps - 1);
577 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
579 if (unlikely (elfmap == MAP_FAILED))
580 /* Let the kernel know we are going to read everything in sequence. */
581 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
584 if (unlikely (elfmap == MAP_FAILED))
586 /* Read from the file descriptor. For this we must position the
588 // XXX Eventually add flag which avoids this if the position
589 // XXX is known to match.
590 if (from != 0 && lseek64 (fd, from, SEEK_SET) != from)
591 error (EXIT_FAILURE, errno, gettext ("lseek64 failed"));
593 return read_block_no_mmap (fd, fname, from, to - from);
596 assert ((off64_t) min_len_bytes < fdlen);
598 if (to < (off64_t) elfmap_off || from > (off64_t) (elfmap_off + elfmap_size))
600 /* The existing mapping cannot fit at all. Map the new area.
601 We always map the full range of ELFMAP_SIZE bytes even if
602 this extend beyond the end of the file. The Linux kernel
603 handles this OK if the access pages are not touched. */
604 elfmap_off = from & ~(ps - 1);
605 if (mmap64 (elfmap, elfmap_size, PROT_READ,
606 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
608 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
609 elfmap_base = elfmap;
612 char *unprinted = NULL;
614 /* Use the existing mapping as much as possible. If necessary, map
616 if (from >= (off64_t) elfmap_off
617 && from < (off64_t) (elfmap_off + elfmap_size))
618 /* There are at least a few bytes in this mapping which we can
620 process_chunk (fname, elfmap_base + (from - elfmap_off),
621 MIN (to, (off64_t) (elfmap_off + elfmap_size)),
622 MIN (to, (off64_t) (elfmap_off + elfmap_size)) - from,
625 if (to > (off64_t) (elfmap_off + elfmap_size))
627 unsigned char *remap_base = elfmap_base;
628 size_t read_now = elfmap_size - (elfmap_base - elfmap);
630 assert (from >= (off64_t) elfmap_off
631 && from < (off64_t) (elfmap_off + elfmap_size));
632 off64_t handled_to = elfmap_off + elfmap_size;
633 assert (elfmap == elfmap_base
634 || (elfmap_base - elfmap
635 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
636 if (elfmap == elfmap_base)
638 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
639 assert (elfmap_size >= keep_area + ps);
640 /* The keep area is used for the content of the previous
641 buffer we have to keep. This means copying those bytes
642 and for this we have to make the data writable. */
643 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
645 error (EXIT_FAILURE, errno, gettext ("mprotect failed"));
647 elfmap_base = elfmap + keep_area;
652 /* Map the rest of the file, eventually again in pieces.
653 We speed things up with a nice Linux feature. Note
654 that we have at least two pages mapped. */
655 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
657 assert (read_now >= to_keep);
658 memmove (elfmap_base - to_keep,
659 remap_base + read_now - to_keep, to_keep);
660 remap_base = elfmap_base;
662 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
664 read_now = MIN (to - handled_to,
665 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
667 assert (handled_to % ps == 0);
668 assert (handled_to % bytes_per_char == 0);
669 if (mmap64 (remap_base, read_now, PROT_READ,
670 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
672 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
673 elfmap_off = handled_to;
675 process_chunk (fname, remap_base - to_keep,
676 elfmap_off + (read_now & ~(bytes_per_char - 1)),
677 to_keep + (read_now & ~(bytes_per_char - 1)),
679 handled_to += read_now;
680 if (handled_to >= to)
685 /* Don't print anything we collected so far. There is no
686 terminating NUL byte. */
694 read_fd (int fd, const char *fname, off64_t fdlen)
696 return read_block (fd, fname, fdlen, 0, fdlen);
701 read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen)
705 /* We will look at each section separately. The ELF file is not
706 mmapped. The libelf implementation will load the needed parts on
707 demand. Since we only interate over the section header table the
708 memory consumption at this stage is kept minimal. */
709 Elf_Scn *scn = elf_nextscn (elf, NULL);
711 return read_fd (fd, fname, fdlen);
717 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
719 /* Only look in sections which are loaded at runtime and
720 actually have content. */
721 if (shdr != NULL && shdr->sh_type != SHT_NOBITS
722 && (shdr->sh_flags & SHF_ALLOC) != 0)
723 result |= read_block (fd, fname, fdlen, shdr->sh_offset,
724 shdr->sh_offset + shdr->sh_size);
726 while ((scn = elf_nextscn (elf, scn)) != NULL);
728 if (elfmap != NULL && elfmap != MAP_FAILED)
729 munmap (elfmap, elfmap_size);
736 #include "debugpred.h"