1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
33 #include "readtokens0.h"
34 #include "safe-read.h"
36 #if !defined iswspace && !HAVE_ISWSPACE
37 # define iswspace(wc) \
38 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "wc"
44 #define AUTHORS "Paul Rubin", "David MacKenzie"
46 /* Size of atomic reads. */
47 #define BUFFER_SIZE (16 * 1024)
49 /* The name this program was run with. */
52 /* Cumulative number of lines, words, chars and bytes in all files so far.
53 max_line_length is the maximum over all files processed so far. */
54 static uintmax_t total_lines;
55 static uintmax_t total_words;
56 static uintmax_t total_chars;
57 static uintmax_t total_bytes;
58 static uintmax_t max_line_length;
60 /* Which counts to print. */
61 static bool print_lines, print_words, print_chars, print_bytes;
62 static bool print_linelength;
64 /* The print width of each count. */
65 static int number_width;
67 /* True if we have ever read the standard input. */
68 static bool have_read_stdin;
70 /* The result of calling fstat or stat on a file descriptor or file. */
73 /* If positive, fstat or stat has not been called yet. Otherwise,
74 this is the value returned from fstat or stat. */
77 /* If FAILED is zero, this is the file's status. */
81 /* For long options that have no equivalent short option, use a
82 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
85 FILES0_FROM_OPTION = CHAR_MAX + 1
88 static struct option const longopts[] =
90 {"bytes", no_argument, NULL, 'c'},
91 {"chars", no_argument, NULL, 'm'},
92 {"lines", no_argument, NULL, 'l'},
93 {"words", no_argument, NULL, 'w'},
94 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
95 {"max-line-length", no_argument, NULL, 'L'},
96 {GETOPT_HELP_OPTION_DECL},
97 {GETOPT_VERSION_OPTION_DECL},
104 if (status != EXIT_SUCCESS)
105 fprintf (stderr, _("Try `%s --help' for more information.\n"),
110 Usage: %s [OPTION]... [FILE]...\n\
111 or: %s [OPTION]... --files0-from=F\n\
113 program_name, program_name);
115 Print newline, word, and byte counts for each FILE, and a total line if\n\
116 more than one FILE is specified. With no FILE, or when FILE is -,\n\
117 read standard input.\n\
118 -c, --bytes print the byte counts\n\
119 -m, --chars print the character counts\n\
120 -l, --lines print the newline counts\n\
123 --files0-from=F read input from the files specified by\n\
124 NUL-terminated names in file F\n\
125 -L, --max-line-length print the length of the longest line\n\
126 -w, --words print the word counts\n\
128 fputs (HELP_OPTION_DESCRIPTION, stdout);
129 fputs (VERSION_OPTION_DESCRIPTION, stdout);
130 emit_bug_reporting_address ();
135 /* FILE is the name of the file (or NULL for standard input)
136 associated with the specified counters. */
138 write_counts (uintmax_t lines,
142 uintmax_t linelength,
145 static char const format_sp_int[] = " %*s";
146 char const *format_int = format_sp_int + 1;
147 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
151 printf (format_int, number_width, umaxtostr (lines, buf));
152 format_int = format_sp_int;
156 printf (format_int, number_width, umaxtostr (words, buf));
157 format_int = format_sp_int;
161 printf (format_int, number_width, umaxtostr (chars, buf));
162 format_int = format_sp_int;
166 printf (format_int, number_width, umaxtostr (bytes, buf));
167 format_int = format_sp_int;
169 if (print_linelength)
171 printf (format_int, number_width, umaxtostr (linelength, buf));
174 printf (" %s", file);
178 /* Count words. FILE_X is the name of the file (or NULL for standard
179 input) that is open on descriptor FD. *FSTATUS is its status.
180 Return true if successful. */
182 wc (int fd, char const *file_x, struct fstatus *fstatus)
185 char buf[BUFFER_SIZE + 1];
187 uintmax_t lines, words, chars, bytes, linelength;
188 bool count_bytes, count_chars, count_complicated;
189 char const *file = file_x ? file_x : _("standard input");
191 lines = words = chars = bytes = linelength = 0;
193 /* If in the current locale, chars are equivalent to bytes, we prefer
194 counting bytes, because that's easier. */
195 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
198 count_bytes = print_bytes;
199 count_chars = print_chars;
204 count_bytes = print_bytes | print_chars;
207 count_complicated = print_words | print_linelength;
209 /* When counting only bytes, save some line- and word-counting
210 overhead. If FD is a `regular' Unix file, using lseek is enough
211 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
212 bytes at a time until EOF. Note that the `size' (number of bytes)
213 that wc reports is smaller than stats.st_size when the file is not
214 positioned at its beginning. That's why the lseek calls below are
215 necessary. For example the command
216 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
217 should make wc report `0' bytes. */
219 if (count_bytes & !count_chars & !print_lines & !count_complicated)
221 off_t current_pos, end_pos;
223 if (0 < fstatus->failed)
224 fstatus->failed = fstat (fd, &fstatus->st);
226 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
227 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
228 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
230 /* Be careful here. The current position may actually be
231 beyond the end of the file. As in the example above. */
232 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
236 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
238 if (bytes_read == SAFE_READ_ERROR)
240 error (0, errno, "%s", file);
248 else if (!count_chars & !count_complicated)
250 /* Use a separate loop when counting only lines or lines and bytes --
251 but not chars or words. */
252 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
256 if (bytes_read == SAFE_READ_ERROR)
258 error (0, errno, "%s", file);
263 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
271 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
272 # define SUPPORT_OLD_MBRTOWC 1
273 else if (MB_CUR_MAX > 1)
275 bool in_word = false;
276 uintmax_t linepos = 0;
277 mbstate_t state = { 0, };
278 # if SUPPORT_OLD_MBRTOWC
279 /* Back-up the state before each multibyte character conversion and
280 move the last incomplete character of the buffer to the front
281 of the buffer. This is needed because we don't know whether
282 the `mbrtowc' function updates the state when it returns -2, -
283 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
284 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
285 autoconf test for this, yet. */
286 size_t prev = 0; /* number of bytes carried over from previous round */
288 const size_t prev = 0;
291 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
294 # if SUPPORT_OLD_MBRTOWC
295 mbstate_t backup_state;
297 if (bytes_read == SAFE_READ_ERROR)
299 error (0, errno, "%s", file);
312 # if SUPPORT_OLD_MBRTOWC
313 backup_state = state;
315 n = mbrtowc (&wide_char, p, bytes_read, &state);
316 if (n == (size_t) -2)
318 # if SUPPORT_OLD_MBRTOWC
319 state = backup_state;
323 if (n == (size_t) -1)
325 /* Remember that we read a byte, but don't complain
326 about the error. Because of the decoding error,
327 this is a considered to be byte but not a
328 character (that is, chars is not incremented). */
349 if (linepos > linelength)
350 linelength = linepos;
352 goto mb_word_separator;
354 linepos += 8 - (linepos % 8);
355 goto mb_word_separator;
365 if (iswprint (wide_char))
367 int width = wcwidth (wide_char);
370 if (iswspace (wide_char))
371 goto mb_word_separator;
378 while (bytes_read > 0);
380 # if SUPPORT_OLD_MBRTOWC
383 if (bytes_read == BUFFER_SIZE)
385 /* Encountered a very long redundant shift sequence. */
389 memmove (buf, p, bytes_read);
394 if (linepos > linelength)
395 linelength = linepos;
401 bool in_word = false;
402 uintmax_t linepos = 0;
404 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
407 if (bytes_read == SAFE_READ_ERROR)
409 error (0, errno, "%s", file);
424 if (linepos > linelength)
425 linelength = linepos;
429 linepos += 8 - (linepos % 8);
440 if (isprint (to_uchar (p[-1])))
443 if (isspace (to_uchar (p[-1])))
450 while (--bytes_read);
452 if (linepos > linelength)
453 linelength = linepos;
457 if (count_chars < print_chars)
460 write_counts (lines, words, chars, bytes, linelength, file_x);
461 total_lines += lines;
462 total_words += words;
463 total_chars += chars;
464 total_bytes += bytes;
465 if (linelength > max_line_length)
466 max_line_length = linelength;
472 wc_file (char const *file, struct fstatus *fstatus)
474 if (! file || STREQ (file, "-"))
476 have_read_stdin = true;
477 if (O_BINARY && ! isatty (STDIN_FILENO))
478 freopen (NULL, "rb", stdin);
479 return wc (STDIN_FILENO, file, fstatus);
483 int fd = open (file, O_RDONLY | O_BINARY);
486 error (0, errno, "%s", file);
491 bool ok = wc (fd, file, fstatus);
494 error (0, errno, "%s", file);
502 /* Return the file status for the NFILES files addressed by FILE.
503 Optimize the case where only one number is printed, for just one
504 file; in that case we can use a print width of 1, so we don't need
507 static struct fstatus *
508 get_input_fstatus (int nfiles, char * const *file)
510 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
513 && ((print_lines + print_words + print_chars
514 + print_bytes + print_linelength)
516 fstatus[0].failed = 1;
521 for (i = 0; i < nfiles; i++)
522 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
523 ? fstat (STDIN_FILENO, &fstatus[i].st)
524 : stat (file[i], &fstatus[i].st));
530 /* Return a print width suitable for the NFILES files whose status is
531 recorded in FSTATUS. Optimize the same special case that
532 get_input_fstatus optimizes. */
535 compute_number_width (int nfiles, struct fstatus const *fstatus)
539 if (0 < nfiles && fstatus[0].failed <= 0)
541 int minimum_width = 1;
542 uintmax_t regular_total = 0;
545 for (i = 0; i < nfiles; i++)
546 if (! fstatus[i].failed)
548 if (S_ISREG (fstatus[i].st.st_mode))
549 regular_total += fstatus[i].st.st_size;
554 for (; 10 <= regular_total; regular_total /= 10)
556 if (width < minimum_width)
557 width = minimum_width;
565 main (int argc, char **argv)
572 char *files_from = NULL;
573 struct fstatus *fstatus;
576 initialize_main (&argc, &argv);
577 program_name = argv[0];
578 setlocale (LC_ALL, "");
579 bindtextdomain (PACKAGE, LOCALEDIR);
580 textdomain (PACKAGE);
582 atexit (close_stdout);
584 print_lines = print_words = print_chars = print_bytes = false;
585 print_linelength = false;
586 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
588 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
608 print_linelength = true;
611 case FILES0_FROM_OPTION:
615 case_GETOPT_HELP_CHAR;
617 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
620 usage (EXIT_FAILURE);
623 if (! (print_lines | print_words | print_chars | print_bytes
625 print_lines = print_words = print_bytes = true;
631 /* When using --files0-from=F, you may not specify any files
632 on the command-line. */
635 error (0, 0, _("extra operand %s"), quote (argv[optind]));
636 fprintf (stderr, "%s\n",
637 _("File operands cannot be combined with --files0-from."));
638 usage (EXIT_FAILURE);
641 if (STREQ (files_from, "-"))
645 stream = fopen (files_from, "r");
647 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
651 readtokens0_init (&tok);
653 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
654 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
662 static char *stdin_only[2];
663 files = (optind < argc ? argv + optind : stdin_only);
664 nfiles = (optind < argc ? argc - optind : 1);
665 stdin_only[0] = NULL;
668 fstatus = get_input_fstatus (nfiles, files);
669 number_width = compute_number_width (nfiles, fstatus);
672 for (i = 0; i < nfiles; i++)
674 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
678 _("when reading file names from stdin, "
679 "no file name of %s allowed"),
683 ok &= wc_file (files[i], &fstatus[i]);
687 write_counts (total_lines, total_words, total_chars, total_bytes,
688 max_line_length, _("total"));
692 if (have_read_stdin && close (STDIN_FILENO) != 0)
693 error (EXIT_FAILURE, errno, "-");
695 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);