1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
24 #include <sys/types.h>
34 #include "readtokens0.h"
35 #include "safe-read.h"
37 #if !defined iswspace && !HAVE_ISWSPACE
38 # define iswspace(wc) \
39 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
42 /* The official name of this program (e.g., no `g' prefix). */
43 #define PROGRAM_NAME "wc"
46 proper_name ("Paul Rubin"), \
47 proper_name ("David MacKenzie")
49 /* Size of atomic reads. */
50 #define BUFFER_SIZE (16 * 1024)
52 /* The name this program was run with. */
55 /* Cumulative number of lines, words, chars and bytes in all files so far.
56 max_line_length is the maximum over all files processed so far. */
57 static uintmax_t total_lines;
58 static uintmax_t total_words;
59 static uintmax_t total_chars;
60 static uintmax_t total_bytes;
61 static uintmax_t max_line_length;
63 /* Which counts to print. */
64 static bool print_lines, print_words, print_chars, print_bytes;
65 static bool print_linelength;
67 /* The print width of each count. */
68 static int number_width;
70 /* True if we have ever read the standard input. */
71 static bool have_read_stdin;
73 /* The result of calling fstat or stat on a file descriptor or file. */
76 /* If positive, fstat or stat has not been called yet. Otherwise,
77 this is the value returned from fstat or stat. */
80 /* If FAILED is zero, this is the file's status. */
84 /* For long options that have no equivalent short option, use a
85 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
88 FILES0_FROM_OPTION = CHAR_MAX + 1
91 static struct option const longopts[] =
93 {"bytes", no_argument, NULL, 'c'},
94 {"chars", no_argument, NULL, 'm'},
95 {"lines", no_argument, NULL, 'l'},
96 {"words", no_argument, NULL, 'w'},
97 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
98 {"max-line-length", no_argument, NULL, 'L'},
99 {GETOPT_HELP_OPTION_DECL},
100 {GETOPT_VERSION_OPTION_DECL},
107 if (status != EXIT_SUCCESS)
108 fprintf (stderr, _("Try `%s --help' for more information.\n"),
113 Usage: %s [OPTION]... [FILE]...\n\
114 or: %s [OPTION]... --files0-from=F\n\
116 program_name, program_name);
118 Print newline, word, and byte counts for each FILE, and a total line if\n\
119 more than one FILE is specified. With no FILE, or when FILE is -,\n\
120 read standard input.\n\
121 -c, --bytes print the byte counts\n\
122 -m, --chars print the character counts\n\
123 -l, --lines print the newline counts\n\
126 --files0-from=F read input from the files specified by\n\
127 NUL-terminated names in file F\n\
128 -L, --max-line-length print the length of the longest line\n\
129 -w, --words print the word counts\n\
131 fputs (HELP_OPTION_DESCRIPTION, stdout);
132 fputs (VERSION_OPTION_DESCRIPTION, stdout);
133 emit_bug_reporting_address ();
138 /* FILE is the name of the file (or NULL for standard input)
139 associated with the specified counters. */
141 write_counts (uintmax_t lines,
145 uintmax_t linelength,
148 static char const format_sp_int[] = " %*s";
149 char const *format_int = format_sp_int + 1;
150 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
154 printf (format_int, number_width, umaxtostr (lines, buf));
155 format_int = format_sp_int;
159 printf (format_int, number_width, umaxtostr (words, buf));
160 format_int = format_sp_int;
164 printf (format_int, number_width, umaxtostr (chars, buf));
165 format_int = format_sp_int;
169 printf (format_int, number_width, umaxtostr (bytes, buf));
170 format_int = format_sp_int;
172 if (print_linelength)
174 printf (format_int, number_width, umaxtostr (linelength, buf));
177 printf (" %s", file);
181 /* Count words. FILE_X is the name of the file (or NULL for standard
182 input) that is open on descriptor FD. *FSTATUS is its status.
183 Return true if successful. */
185 wc (int fd, char const *file_x, struct fstatus *fstatus)
188 char buf[BUFFER_SIZE + 1];
190 uintmax_t lines, words, chars, bytes, linelength;
191 bool count_bytes, count_chars, count_complicated;
192 char const *file = file_x ? file_x : _("standard input");
194 lines = words = chars = bytes = linelength = 0;
196 /* If in the current locale, chars are equivalent to bytes, we prefer
197 counting bytes, because that's easier. */
198 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
201 count_bytes = print_bytes;
202 count_chars = print_chars;
207 count_bytes = print_bytes | print_chars;
210 count_complicated = print_words | print_linelength;
212 /* When counting only bytes, save some line- and word-counting
213 overhead. If FD is a `regular' Unix file, using lseek is enough
214 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
215 bytes at a time until EOF. Note that the `size' (number of bytes)
216 that wc reports is smaller than stats.st_size when the file is not
217 positioned at its beginning. That's why the lseek calls below are
218 necessary. For example the command
219 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
220 should make wc report `0' bytes. */
222 if (count_bytes & !count_chars & !print_lines & !count_complicated)
224 off_t current_pos, end_pos;
226 if (0 < fstatus->failed)
227 fstatus->failed = fstat (fd, &fstatus->st);
229 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
230 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
231 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
233 /* Be careful here. The current position may actually be
234 beyond the end of the file. As in the example above. */
235 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
239 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
241 if (bytes_read == SAFE_READ_ERROR)
243 error (0, errno, "%s", file);
251 else if (!count_chars & !count_complicated)
253 /* Use a separate loop when counting only lines or lines and bytes --
254 but not chars or words. */
255 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
259 if (bytes_read == SAFE_READ_ERROR)
261 error (0, errno, "%s", file);
266 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
274 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
275 # define SUPPORT_OLD_MBRTOWC 1
276 else if (MB_CUR_MAX > 1)
278 bool in_word = false;
279 uintmax_t linepos = 0;
280 mbstate_t state = { 0, };
281 bool in_shift = false;
282 # if SUPPORT_OLD_MBRTOWC
283 /* Back-up the state before each multibyte character conversion and
284 move the last incomplete character of the buffer to the front
285 of the buffer. This is needed because we don't know whether
286 the `mbrtowc' function updates the state when it returns -2, -
287 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
288 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
289 autoconf test for this, yet. */
290 size_t prev = 0; /* number of bytes carried over from previous round */
292 const size_t prev = 0;
295 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
298 # if SUPPORT_OLD_MBRTOWC
299 mbstate_t backup_state;
301 if (bytes_read == SAFE_READ_ERROR)
303 error (0, errno, "%s", file);
316 if (!in_shift && is_basic (*p))
318 /* Handle most ASCII characters quickly, without calling
326 # if SUPPORT_OLD_MBRTOWC
327 backup_state = state;
329 n = mbrtowc (&wide_char, p, bytes_read, &state);
330 if (n == (size_t) -2)
332 # if SUPPORT_OLD_MBRTOWC
333 state = backup_state;
337 if (n == (size_t) -1)
339 /* Remember that we read a byte, but don't complain
340 about the error. Because of the decoding error,
341 this is a considered to be byte but not a
342 character (that is, chars is not incremented). */
347 if (mbsinit (&state))
365 if (linepos > linelength)
366 linelength = linepos;
368 goto mb_word_separator;
370 linepos += 8 - (linepos % 8);
371 goto mb_word_separator;
381 if (iswprint (wide_char))
383 int width = wcwidth (wide_char);
386 if (iswspace (wide_char))
387 goto mb_word_separator;
393 while (bytes_read > 0);
395 # if SUPPORT_OLD_MBRTOWC
398 if (bytes_read == BUFFER_SIZE)
400 /* Encountered a very long redundant shift sequence. */
404 memmove (buf, p, bytes_read);
409 if (linepos > linelength)
410 linelength = linepos;
416 bool in_word = false;
417 uintmax_t linepos = 0;
419 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
422 if (bytes_read == SAFE_READ_ERROR)
424 error (0, errno, "%s", file);
439 if (linepos > linelength)
440 linelength = linepos;
444 linepos += 8 - (linepos % 8);
455 if (isprint (to_uchar (p[-1])))
458 if (isspace (to_uchar (p[-1])))
465 while (--bytes_read);
467 if (linepos > linelength)
468 linelength = linepos;
472 if (count_chars < print_chars)
475 write_counts (lines, words, chars, bytes, linelength, file_x);
476 total_lines += lines;
477 total_words += words;
478 total_chars += chars;
479 total_bytes += bytes;
480 if (linelength > max_line_length)
481 max_line_length = linelength;
487 wc_file (char const *file, struct fstatus *fstatus)
489 if (! file || STREQ (file, "-"))
491 have_read_stdin = true;
492 if (O_BINARY && ! isatty (STDIN_FILENO))
493 freopen (NULL, "rb", stdin);
494 return wc (STDIN_FILENO, file, fstatus);
498 int fd = open (file, O_RDONLY | O_BINARY);
501 error (0, errno, "%s", file);
506 bool ok = wc (fd, file, fstatus);
509 error (0, errno, "%s", file);
517 /* Return the file status for the NFILES files addressed by FILE.
518 Optimize the case where only one number is printed, for just one
519 file; in that case we can use a print width of 1, so we don't need
522 static struct fstatus *
523 get_input_fstatus (int nfiles, char * const *file)
525 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
528 && ((print_lines + print_words + print_chars
529 + print_bytes + print_linelength)
531 fstatus[0].failed = 1;
536 for (i = 0; i < nfiles; i++)
537 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
538 ? fstat (STDIN_FILENO, &fstatus[i].st)
539 : stat (file[i], &fstatus[i].st));
545 /* Return a print width suitable for the NFILES files whose status is
546 recorded in FSTATUS. Optimize the same special case that
547 get_input_fstatus optimizes. */
550 compute_number_width (int nfiles, struct fstatus const *fstatus)
554 if (0 < nfiles && fstatus[0].failed <= 0)
556 int minimum_width = 1;
557 uintmax_t regular_total = 0;
560 for (i = 0; i < nfiles; i++)
561 if (! fstatus[i].failed)
563 if (S_ISREG (fstatus[i].st.st_mode))
564 regular_total += fstatus[i].st.st_size;
569 for (; 10 <= regular_total; regular_total /= 10)
571 if (width < minimum_width)
572 width = minimum_width;
580 main (int argc, char **argv)
587 char *files_from = NULL;
588 struct fstatus *fstatus;
591 initialize_main (&argc, &argv);
592 program_name = argv[0];
593 setlocale (LC_ALL, "");
594 bindtextdomain (PACKAGE, LOCALEDIR);
595 textdomain (PACKAGE);
597 atexit (close_stdout);
599 print_lines = print_words = print_chars = print_bytes = false;
600 print_linelength = false;
601 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
603 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
623 print_linelength = true;
626 case FILES0_FROM_OPTION:
630 case_GETOPT_HELP_CHAR;
632 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
635 usage (EXIT_FAILURE);
638 if (! (print_lines | print_words | print_chars | print_bytes
640 print_lines = print_words = print_bytes = true;
646 /* When using --files0-from=F, you may not specify any files
647 on the command-line. */
650 error (0, 0, _("extra operand %s"), quote (argv[optind]));
651 fprintf (stderr, "%s\n",
652 _("File operands cannot be combined with --files0-from."));
653 usage (EXIT_FAILURE);
656 if (STREQ (files_from, "-"))
660 stream = fopen (files_from, "r");
662 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
666 readtokens0_init (&tok);
668 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
669 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
677 static char *stdin_only[2];
678 files = (optind < argc ? argv + optind : stdin_only);
679 nfiles = (optind < argc ? argc - optind : 1);
680 stdin_only[0] = NULL;
683 fstatus = get_input_fstatus (nfiles, files);
684 number_width = compute_number_width (nfiles, fstatus);
687 for (i = 0; i < nfiles; i++)
691 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
694 /* Give a better diagnostic in an unusual case:
695 printf - | wc --files0-from=- */
696 error (0, 0, _("when reading file names from stdin, "
697 "no file name of %s allowed"),
702 /* Diagnose a zero-length file name. When it's one
703 among many, knowing the record number may help. */
704 if (files[i][0] == '\0')
709 /* Using the standard `filename:line-number:' prefix here is
710 not totally appropriate, since NUL is the separator, not NL,
711 but it might be better than nothing. */
712 unsigned long int file_number = i + 1;
713 error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
714 file_number, _("invalid zero-length file name"));
717 error (0, 0, "%s", _("invalid zero-length file name"));
722 ok &= wc_file (files[i], &fstatus[i]);
726 write_counts (total_lines, total_words, total_chars, total_bytes,
727 max_line_length, _("total"));
731 if (have_read_stdin && close (STDIN_FILENO) != 0)
732 error (EXIT_FAILURE, errno, "-");
734 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);