1 /* Print the strings of printable characters in files.
2 Copyright (C) 2005-2010, 2012 Red Hat, Inc.
3 This file is part of Red Hat elfutils.
4 Written by Ulrich Drepper <drepper@redhat.com>, 2005.
6 Red Hat elfutils is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by the
8 Free Software Foundation; version 2 of the License.
10 Red Hat elfutils is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with Red Hat elfutils; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
19 Red Hat elfutils is an included package of the Open Invention Network.
20 An included package of the Open Invention Network is a package for which
21 Open Invention Network licensees cross-license their patents. No patent
22 license is granted, either expressly or impliedly, by designation as an
23 included package. Should you wish to participate in the Open Invention
24 Network licensing program, please visit www.openinventionnetwork.com
25 <http://www.openinventionnetwork.com>. */
44 #include <stdio_ext.h>
49 #include <sys/param.h>
55 /* Prototypes of local functions. */
56 static int read_fd (int fd, const char *fname, off64_t fdlen);
57 static int read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen);
60 /* Name and version of program. */
61 static void print_version (FILE *stream, struct argp_state *state);
62 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
64 /* Bug report address. */
65 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
67 /* Definitions of arguments for argp functions. */
68 static const struct argp_option options[] =
70 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
71 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
72 { "bytes", 'n', "MIN-LEN", 0,
73 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
74 { "encoding", 'e', "SELECTOR", 0, N_("\
75 Select character size and endianess: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
77 { "print-file-name", 'f', NULL, 0,
78 N_("Print name of the file before each string."), 0 },
79 { "radix", 't', "{o,d,x}", 0,
80 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
81 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
83 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
84 { NULL, 0, NULL, 0, NULL, 0 }
87 /* Short description of program. */
88 static const char doc[] = N_("\
89 Print the strings of printable characters in files.");
91 /* Strings for arguments in help texts. */
92 static const char args_doc[] = N_("[FILE...]");
94 /* Prototype for option handler. */
95 static error_t parse_opt (int key, char *arg, struct argp_state *state);
97 /* Data structure to communicate with argp functions. */
98 static struct argp argp =
100 options, parse_opt, args_doc, doc, NULL, NULL, NULL
104 /* Global variables. */
106 /* True if whole file and not only loaded sections are looked at. */
107 static bool entire_file;
109 /* Minimum length of any sequence reported. */
110 static size_t min_len = 4;
112 /* Number of bytes per character. */
113 static size_t bytes_per_char = 1;
115 /* Minimum length of any sequence reported in bytes. */
116 static size_t min_len_bytes;
118 /* True if multibyte characters are in big-endian order. */
119 static bool big_endian;
121 /* True unless 7-bit ASCII are expected. */
122 static bool char_7bit;
124 /* True if file names should be printed before strings. */
125 static bool print_file_name;
127 /* Location print format string. */
128 static const char *locfmt;
130 /* Page size in use. */
134 /* Mapped parts of the ELF file. */
135 static unsigned char *elfmap;
136 static unsigned char *elfmap_base;
137 static size_t elfmap_size;
138 static off64_t elfmap_off;
142 main (int argc, char *argv[])
144 /* We use no threads. */
145 __fsetlocking (stdin, FSETLOCKING_BYCALLER);
146 __fsetlocking (stdout, FSETLOCKING_BYCALLER);
149 (void) setlocale (LC_ALL, "");
151 /* Make sure the message catalog can be found. */
152 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
154 /* Initialize the message catalog. */
155 (void) textdomain (PACKAGE_TARNAME);
157 /* Parse and process arguments. */
159 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
161 /* Tell the library which version we are expecting. */
162 elf_version (EV_CURRENT);
164 /* Determine the page size. We will likely need it a couple of times. */
165 ps = sysconf (_SC_PAGESIZE);
169 if (remaining == argc)
170 /* We read from standard input. This we cannot do for a
172 result = read_fd (STDIN_FILENO,
173 print_file_name ? "{standard input}" : NULL,
174 (fstat64 (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
175 ? st.st_size : INT64_C (0x7fffffffffffffff));
179 int fd = (strcmp (argv[remaining], "-") == 0
180 ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
181 if (unlikely (fd == -1))
183 error (0, errno, gettext ("cannot open '%s'"), argv[remaining]);
188 const char *fname = print_file_name ? argv[remaining] : NULL;
189 int fstat_fail = fstat64 (fd, &st);
190 off64_t fdlen = (fstat_fail
191 ? INT64_C (0x7fffffffffffffff) : st.st_size);
192 if (fdlen > (off64_t) min_len_bytes)
197 || !S_ISREG (st.st_mode)
198 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
199 || elf_kind (elf) != ELF_K_ELF)
200 result |= read_fd (fd, fname, fdlen);
202 result |= read_elf (elf, fd, fname, fdlen);
204 /* This call will succeed even if ELF is NULL. */
208 if (strcmp (argv[remaining], "-") != 0)
212 if (elfmap != NULL && elfmap != MAP_FAILED)
213 munmap (elfmap, elfmap_size);
216 while (++remaining < argc);
222 /* Print the version information. */
224 print_version (FILE *stream, struct argp_state *state __attribute__ ((unused)))
226 fprintf (stream, "strings (%s) %s\n", PACKAGE_NAME, PACKAGE_VERSION);
227 fprintf (stream, gettext ("\
228 Copyright (C) %s Red Hat, Inc.\n\
229 This is free software; see the source for copying conditions. There is NO\n\
230 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
232 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
236 /* Handle program arguments. */
238 parse_opt (int key, char *arg,
239 struct argp_state *state __attribute__ ((unused)))
248 /* We expect a string of one character. */
249 switch (arg[1] != '\0' ? '\0' : arg[0])
253 char_7bit = arg[0] == 's';
264 bytes_per_char = isupper (arg[0]) ? 4 : 2;
268 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
270 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
271 return ARGP_ERR_UNKNOWN;
276 print_file_name = true;
280 min_len = atoi (arg);
290 locfmt = "%7" PRId64 " ";
295 locfmt = "%7" PRIo64 " ";
299 locfmt = "%7" PRIx64 " ";
303 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
305 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
306 return ARGP_ERR_UNKNOWN;
311 /* Compute the length in bytes of any match. */
312 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
313 error (EXIT_FAILURE, 0,
314 gettext ("invalid minimum length of matched string size"));
315 min_len_bytes = min_len * bytes_per_char;
319 return ARGP_ERR_UNKNOWN;
326 process_chunk_mb (const char *fname, const unsigned char *buf, off64_t to,
327 size_t len, char **unprinted)
329 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
330 const unsigned char *start = buf;
331 while (len >= bytes_per_char)
335 if (bytes_per_char == 2)
338 ch = buf[0] << 8 | buf[1];
340 ch = buf[1] << 8 | buf[0];
345 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
347 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
350 if (ch <= 255 && (isprint (ch) || ch == '\t'))
357 if (curlen >= min_len)
359 /* We found a match. */
360 if (unlikely (fname != NULL))
362 fputs_unlocked (fname, stdout);
363 fputs_unlocked (": ", stdout);
366 if (unlikely (locfmt != NULL))
367 printf (locfmt, (int64_t) to - len - (buf - start));
369 if (unlikely (*unprinted != NULL))
371 fputs_unlocked (*unprinted, stdout);
376 /* There is no sane way of printing the string. If we
377 assume the file data is encoded in UCS-2/UTF-16 or
378 UCS-4/UTF-32 respectively we could covert the string.
379 But there is no such guarantee. */
380 fwrite_unlocked (start, 1, buf - start, stdout);
381 putc_unlocked ('\n', stdout);
395 *unprinted = xstrndup ((const char *) start, curlen);
400 process_chunk (const char *fname, const unsigned char *buf, off64_t to,
401 size_t len, char **unprinted)
403 /* We are not going to slow the check down for the 2- and 4-byte
404 encodings. Handle them special. */
405 if (unlikely (bytes_per_char != 1))
407 process_chunk_mb (fname, buf, to, len, unprinted);
411 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
412 const unsigned char *start = buf;
415 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
422 if (curlen >= min_len)
424 /* We found a match. */
425 if (likely (fname != NULL))
427 fputs_unlocked (fname, stdout);
428 fputs_unlocked (": ", stdout);
431 if (likely (locfmt != NULL))
432 printf (locfmt, (int64_t) to - len - (buf - start));
434 if (unlikely (*unprinted != NULL))
436 fputs_unlocked (*unprinted, stdout);
440 fwrite_unlocked (start, 1, buf - start, stdout);
441 putc_unlocked ('\n', stdout);
455 *unprinted = xstrndup ((const char *) start, curlen);
459 /* Map a file in as large chunks as possible. */
461 map_file (int fd, off64_t start_off, off64_t fdlen, size_t *map_sizep)
470 /* Maximum size we mmap. We use an #ifdef to avoid overflows on
471 32-bit machines. 64-bit machines these days do not have usable
472 address spaces larger than about 43 bits. Not that any file
473 should be that large. */
474 # if SIZE_MAX > 0xffffffff
475 const size_t mmap_max = 0x4000000000lu;
477 const size_t mmap_max = 0x40000000lu;
480 /* Try to mmap the file. */
481 size_t map_size = MIN ((off64_t) mmap_max, fdlen);
482 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
483 roundup (2 * min_len_bytes + 1, ps));
487 /* We map the memory for reading only here. Since we will
488 always look at every byte of the file it makes sense to
490 mem = mmap64 (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
492 if (mem != MAP_FAILED)
494 /* We will go through the mapping sequentially. */
495 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
498 if (errno != EINVAL && errno != ENOMEM)
499 /* This is an error other than the lack of address space. */
502 /* Maybe the size of the mapping is too big. Try again. */
504 if (map_size < map_size_min)
505 /* That size should have fit. */
509 *map_sizep = map_size;
515 /* Read the file without mapping. */
517 read_block_no_mmap (int fd, const char *fname, off64_t from, off64_t fdlen)
519 char *unprinted = NULL;
520 #define CHUNKSIZE 65536
521 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
522 + bytes_per_char - 1);
527 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
528 MIN (fdlen, CHUNKSIZE)));
531 /* There are less than MIN_LEN+1 bytes left so there cannot be
533 assert (unprinted == NULL || ntrailer == 0);
536 if (unlikely (n < 0))
538 /* Something went wrong. */
543 /* Account for the number of bytes read in this round. */
546 /* Do not use the signed N value. Note that the addition cannot
548 size_t nb = (size_t) n + ntrailer;
549 if (nb >= min_len_bytes)
551 /* We only use complete characters. */
552 nb &= ~(bytes_per_char - 1);
554 process_chunk (fname, buf, from + nb, nb, &unprinted);
556 /* If the last bytes of the buffer (modulo the character
557 size) have been printed we are not copying them. */
558 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
560 memmove (buf, buf + nb - to_keep, to_keep);
570 /* Don't print anything we collected so far. There is no
571 terminating NUL byte. */
579 read_block (int fd, const char *fname, off64_t fdlen, off64_t from, off64_t to)
583 /* We need a completely new mapping. */
584 elfmap_off = from & ~(ps - 1);
585 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
587 if (unlikely (elfmap == MAP_FAILED))
588 /* Let the kernel know we are going to read everything in sequence. */
589 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
592 if (unlikely (elfmap == MAP_FAILED))
594 /* Read from the file descriptor. For this we must position the
596 // XXX Eventually add flag which avoids this if the position
597 // XXX is known to match.
598 if (from != 0 && lseek64 (fd, from, SEEK_SET) != from)
599 error (EXIT_FAILURE, errno, gettext ("lseek64 failed"));
601 return read_block_no_mmap (fd, fname, from, to - from);
604 assert ((off64_t) min_len_bytes < fdlen);
606 if (to < (off64_t) elfmap_off || from > (off64_t) (elfmap_off + elfmap_size))
608 /* The existing mapping cannot fit at all. Map the new area.
609 We always map the full range of ELFMAP_SIZE bytes even if
610 this extend beyond the end of the file. The Linux kernel
611 handles this OK if the access pages are not touched. */
612 elfmap_off = from & ~(ps - 1);
613 if (mmap64 (elfmap, elfmap_size, PROT_READ,
614 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
616 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
617 elfmap_base = elfmap;
620 char *unprinted = NULL;
622 /* Use the existing mapping as much as possible. If necessary, map
624 if (from >= (off64_t) elfmap_off
625 && from < (off64_t) (elfmap_off + elfmap_size))
626 /* There are at least a few bytes in this mapping which we can
628 process_chunk (fname, elfmap_base + (from - elfmap_off),
629 MIN (to, (off64_t) (elfmap_off + elfmap_size)),
630 MIN (to, (off64_t) (elfmap_off + elfmap_size)) - from,
633 if (to > (off64_t) (elfmap_off + elfmap_size))
635 unsigned char *remap_base = elfmap_base;
636 size_t read_now = elfmap_size - (elfmap_base - elfmap);
638 assert (from >= (off64_t) elfmap_off
639 && from < (off64_t) (elfmap_off + elfmap_size));
640 off64_t handled_to = elfmap_off + elfmap_size;
641 assert (elfmap == elfmap_base
642 || (elfmap_base - elfmap
643 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
644 if (elfmap == elfmap_base)
646 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
647 assert (elfmap_size >= keep_area + ps);
648 /* The keep area is used for the content of the previous
649 buffer we have to keep. This means copying those bytes
650 and for this we have to make the data writable. */
651 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
653 error (EXIT_FAILURE, errno, gettext ("mprotect failed"));
655 elfmap_base = elfmap + keep_area;
660 /* Map the rest of the file, eventually again in pieces.
661 We speed things up with a nice Linux feature. Note
662 that we have at least two pages mapped. */
663 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
665 assert (read_now >= to_keep);
666 memmove (elfmap_base - to_keep,
667 remap_base + read_now - to_keep, to_keep);
668 remap_base = elfmap_base;
670 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
672 read_now = MIN (to - handled_to,
673 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
675 assert (handled_to % ps == 0);
676 assert (handled_to % bytes_per_char == 0);
677 if (mmap64 (remap_base, read_now, PROT_READ,
678 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
680 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
681 elfmap_off = handled_to;
683 process_chunk (fname, remap_base - to_keep,
684 elfmap_off + (read_now & ~(bytes_per_char - 1)),
685 to_keep + (read_now & ~(bytes_per_char - 1)),
687 handled_to += read_now;
688 if (handled_to >= to)
693 /* Don't print anything we collected so far. There is no
694 terminating NUL byte. */
702 read_fd (int fd, const char *fname, off64_t fdlen)
704 return read_block (fd, fname, fdlen, 0, fdlen);
709 read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen)
713 /* We will look at each section separately. The ELF file is not
714 mmapped. The libelf implementation will load the needed parts on
715 demand. Since we only interate over the section header table the
716 memory consumption at this stage is kept minimal. */
717 Elf_Scn *scn = elf_nextscn (elf, NULL);
719 return read_fd (fd, fname, fdlen);
725 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
727 /* Only look in sections which are loaded at runtime and
728 actually have content. */
729 if (shdr != NULL && shdr->sh_type != SHT_NOBITS
730 && (shdr->sh_flags & SHF_ALLOC) != 0)
731 result |= read_block (fd, fname, fdlen, shdr->sh_offset,
732 shdr->sh_offset + shdr->sh_size);
734 while ((scn = elf_nextscn (elf, scn)) != NULL);
736 if (elfmap != NULL && elfmap != MAP_FAILED)
737 munmap (elfmap, elfmap_size);
744 #include "debugpred.h"