1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
24 #include <sys/types.h>
33 #include "readtokens0.h"
34 #include "safe-read.h"
36 #if !defined iswspace && !HAVE_ISWSPACE
37 # define iswspace(wc) \
38 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "wc"
45 proper_name ("Paul Rubin"), \
46 proper_name ("David MacKenzie")
48 /* Size of atomic reads. */
49 #define BUFFER_SIZE (16 * 1024)
51 /* Cumulative number of lines, words, chars and bytes in all files so far.
52 max_line_length is the maximum over all files processed so far. */
53 static uintmax_t total_lines;
54 static uintmax_t total_words;
55 static uintmax_t total_chars;
56 static uintmax_t total_bytes;
57 static uintmax_t max_line_length;
59 /* Which counts to print. */
60 static bool print_lines, print_words, print_chars, print_bytes;
61 static bool print_linelength;
63 /* The print width of each count. */
64 static int number_width;
66 /* True if we have ever read the standard input. */
67 static bool have_read_stdin;
69 /* The result of calling fstat or stat on a file descriptor or file. */
72 /* If positive, fstat or stat has not been called yet. Otherwise,
73 this is the value returned from fstat or stat. */
76 /* If FAILED is zero, this is the file's status. */
80 /* For long options that have no equivalent short option, use a
81 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
84 FILES0_FROM_OPTION = CHAR_MAX + 1
87 static struct option const longopts[] =
89 {"bytes", no_argument, NULL, 'c'},
90 {"chars", no_argument, NULL, 'm'},
91 {"lines", no_argument, NULL, 'l'},
92 {"words", no_argument, NULL, 'w'},
93 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
94 {"max-line-length", no_argument, NULL, 'L'},
95 {GETOPT_HELP_OPTION_DECL},
96 {GETOPT_VERSION_OPTION_DECL},
103 if (status != EXIT_SUCCESS)
104 fprintf (stderr, _("Try `%s --help' for more information.\n"),
109 Usage: %s [OPTION]... [FILE]...\n\
110 or: %s [OPTION]... --files0-from=F\n\
112 program_name, program_name);
114 Print newline, word, and byte counts for each FILE, and a total line if\n\
115 more than one FILE is specified. With no FILE, or when FILE is -,\n\
116 read standard input.\n\
117 -c, --bytes print the byte counts\n\
118 -m, --chars print the character counts\n\
119 -l, --lines print the newline counts\n\
122 --files0-from=F read input from the files specified by\n\
123 NUL-terminated names in file F\n\
124 -L, --max-line-length print the length of the longest line\n\
125 -w, --words print the word counts\n\
127 fputs (HELP_OPTION_DESCRIPTION, stdout);
128 fputs (VERSION_OPTION_DESCRIPTION, stdout);
129 emit_bug_reporting_address ();
134 /* FILE is the name of the file (or NULL for standard input)
135 associated with the specified counters. */
137 write_counts (uintmax_t lines,
141 uintmax_t linelength,
144 static char const format_sp_int[] = " %*s";
145 char const *format_int = format_sp_int + 1;
146 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
150 printf (format_int, number_width, umaxtostr (lines, buf));
151 format_int = format_sp_int;
155 printf (format_int, number_width, umaxtostr (words, buf));
156 format_int = format_sp_int;
160 printf (format_int, number_width, umaxtostr (chars, buf));
161 format_int = format_sp_int;
165 printf (format_int, number_width, umaxtostr (bytes, buf));
166 format_int = format_sp_int;
168 if (print_linelength)
170 printf (format_int, number_width, umaxtostr (linelength, buf));
173 printf (" %s", file);
177 /* Count words. FILE_X is the name of the file (or NULL for standard
178 input) that is open on descriptor FD. *FSTATUS is its status.
179 Return true if successful. */
181 wc (int fd, char const *file_x, struct fstatus *fstatus)
184 char buf[BUFFER_SIZE + 1];
186 uintmax_t lines, words, chars, bytes, linelength;
187 bool count_bytes, count_chars, count_complicated;
188 char const *file = file_x ? file_x : _("standard input");
190 lines = words = chars = bytes = linelength = 0;
192 /* If in the current locale, chars are equivalent to bytes, we prefer
193 counting bytes, because that's easier. */
194 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
197 count_bytes = print_bytes;
198 count_chars = print_chars;
203 count_bytes = print_bytes | print_chars;
206 count_complicated = print_words | print_linelength;
208 /* When counting only bytes, save some line- and word-counting
209 overhead. If FD is a `regular' Unix file, using lseek is enough
210 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
211 bytes at a time until EOF. Note that the `size' (number of bytes)
212 that wc reports is smaller than stats.st_size when the file is not
213 positioned at its beginning. That's why the lseek calls below are
214 necessary. For example the command
215 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
216 should make wc report `0' bytes. */
218 if (count_bytes & !count_chars & !print_lines & !count_complicated)
220 off_t current_pos, end_pos;
222 if (0 < fstatus->failed)
223 fstatus->failed = fstat (fd, &fstatus->st);
225 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
226 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
227 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
229 /* Be careful here. The current position may actually be
230 beyond the end of the file. As in the example above. */
231 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
235 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
237 if (bytes_read == SAFE_READ_ERROR)
239 error (0, errno, "%s", file);
247 else if (!count_chars & !count_complicated)
249 /* Use a separate loop when counting only lines or lines and bytes --
250 but not chars or words. */
251 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
255 if (bytes_read == SAFE_READ_ERROR)
257 error (0, errno, "%s", file);
262 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
270 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
271 # define SUPPORT_OLD_MBRTOWC 1
272 else if (MB_CUR_MAX > 1)
274 bool in_word = false;
275 uintmax_t linepos = 0;
276 mbstate_t state = { 0, };
277 bool in_shift = false;
278 # if SUPPORT_OLD_MBRTOWC
279 /* Back-up the state before each multibyte character conversion and
280 move the last incomplete character of the buffer to the front
281 of the buffer. This is needed because we don't know whether
282 the `mbrtowc' function updates the state when it returns -2, -
283 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
284 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
285 autoconf test for this, yet. */
286 size_t prev = 0; /* number of bytes carried over from previous round */
288 const size_t prev = 0;
291 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
294 # if SUPPORT_OLD_MBRTOWC
295 mbstate_t backup_state;
297 if (bytes_read == SAFE_READ_ERROR)
299 error (0, errno, "%s", file);
312 if (!in_shift && is_basic (*p))
314 /* Handle most ASCII characters quickly, without calling
322 # if SUPPORT_OLD_MBRTOWC
323 backup_state = state;
325 n = mbrtowc (&wide_char, p, bytes_read, &state);
326 if (n == (size_t) -2)
328 # if SUPPORT_OLD_MBRTOWC
329 state = backup_state;
333 if (n == (size_t) -1)
335 /* Remember that we read a byte, but don't complain
336 about the error. Because of the decoding error,
337 this is a considered to be byte but not a
338 character (that is, chars is not incremented). */
343 if (mbsinit (&state))
361 if (linepos > linelength)
362 linelength = linepos;
364 goto mb_word_separator;
366 linepos += 8 - (linepos % 8);
367 goto mb_word_separator;
377 if (iswprint (wide_char))
379 int width = wcwidth (wide_char);
382 if (iswspace (wide_char))
383 goto mb_word_separator;
389 while (bytes_read > 0);
391 # if SUPPORT_OLD_MBRTOWC
394 if (bytes_read == BUFFER_SIZE)
396 /* Encountered a very long redundant shift sequence. */
400 memmove (buf, p, bytes_read);
405 if (linepos > linelength)
406 linelength = linepos;
412 bool in_word = false;
413 uintmax_t linepos = 0;
415 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
418 if (bytes_read == SAFE_READ_ERROR)
420 error (0, errno, "%s", file);
435 if (linepos > linelength)
436 linelength = linepos;
440 linepos += 8 - (linepos % 8);
451 if (isprint (to_uchar (p[-1])))
454 if (isspace (to_uchar (p[-1])))
461 while (--bytes_read);
463 if (linepos > linelength)
464 linelength = linepos;
468 if (count_chars < print_chars)
471 write_counts (lines, words, chars, bytes, linelength, file_x);
472 total_lines += lines;
473 total_words += words;
474 total_chars += chars;
475 total_bytes += bytes;
476 if (linelength > max_line_length)
477 max_line_length = linelength;
483 wc_file (char const *file, struct fstatus *fstatus)
485 if (! file || STREQ (file, "-"))
487 have_read_stdin = true;
488 if (O_BINARY && ! isatty (STDIN_FILENO))
489 freopen (NULL, "rb", stdin);
490 return wc (STDIN_FILENO, file, fstatus);
494 int fd = open (file, O_RDONLY | O_BINARY);
497 error (0, errno, "%s", file);
502 bool ok = wc (fd, file, fstatus);
505 error (0, errno, "%s", file);
513 /* Return the file status for the NFILES files addressed by FILE.
514 Optimize the case where only one number is printed, for just one
515 file; in that case we can use a print width of 1, so we don't need
518 static struct fstatus *
519 get_input_fstatus (int nfiles, char * const *file)
521 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
524 && ((print_lines + print_words + print_chars
525 + print_bytes + print_linelength)
527 fstatus[0].failed = 1;
532 for (i = 0; i < nfiles; i++)
533 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
534 ? fstat (STDIN_FILENO, &fstatus[i].st)
535 : stat (file[i], &fstatus[i].st));
541 /* Return a print width suitable for the NFILES files whose status is
542 recorded in FSTATUS. Optimize the same special case that
543 get_input_fstatus optimizes. */
546 compute_number_width (int nfiles, struct fstatus const *fstatus)
550 if (0 < nfiles && fstatus[0].failed <= 0)
552 int minimum_width = 1;
553 uintmax_t regular_total = 0;
556 for (i = 0; i < nfiles; i++)
557 if (! fstatus[i].failed)
559 if (S_ISREG (fstatus[i].st.st_mode))
560 regular_total += fstatus[i].st.st_size;
565 for (; 10 <= regular_total; regular_total /= 10)
567 if (width < minimum_width)
568 width = minimum_width;
576 main (int argc, char **argv)
583 char *files_from = NULL;
584 struct fstatus *fstatus;
587 initialize_main (&argc, &argv);
588 set_program_name (argv[0]);
589 setlocale (LC_ALL, "");
590 bindtextdomain (PACKAGE, LOCALEDIR);
591 textdomain (PACKAGE);
593 atexit (close_stdout);
595 print_lines = print_words = print_chars = print_bytes = false;
596 print_linelength = false;
597 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
599 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
619 print_linelength = true;
622 case FILES0_FROM_OPTION:
626 case_GETOPT_HELP_CHAR;
628 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
631 usage (EXIT_FAILURE);
634 if (! (print_lines | print_words | print_chars | print_bytes
636 print_lines = print_words = print_bytes = true;
642 /* When using --files0-from=F, you may not specify any files
643 on the command-line. */
646 error (0, 0, _("extra operand %s"), quote (argv[optind]));
647 fprintf (stderr, "%s\n",
648 _("file operands cannot be combined with --files0-from"));
649 usage (EXIT_FAILURE);
652 if (STREQ (files_from, "-"))
656 stream = fopen (files_from, "r");
658 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
662 readtokens0_init (&tok);
664 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
665 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
673 static char *stdin_only[2];
674 files = (optind < argc ? argv + optind : stdin_only);
675 nfiles = (optind < argc ? argc - optind : 1);
676 stdin_only[0] = NULL;
679 fstatus = get_input_fstatus (nfiles, files);
680 number_width = compute_number_width (nfiles, fstatus);
683 for (i = 0; i < nfiles; i++)
687 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
690 /* Give a better diagnostic in an unusual case:
691 printf - | wc --files0-from=- */
692 error (0, 0, _("when reading file names from stdin, "
693 "no file name of %s allowed"),
698 /* Diagnose a zero-length file name. When it's one
699 among many, knowing the record number may help. */
700 if (files[i][0] == '\0')
705 /* Using the standard `filename:line-number:' prefix here is
706 not totally appropriate, since NUL is the separator, not NL,
707 but it might be better than nothing. */
708 unsigned long int file_number = i + 1;
709 error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
710 file_number, _("invalid zero-length file name"));
713 error (0, 0, "%s", _("invalid zero-length file name"));
718 ok &= wc_file (files[i], &fstatus[i]);
722 write_counts (total_lines, total_words, total_chars, total_bytes,
723 max_line_length, _("total"));
727 if (have_read_stdin && close (STDIN_FILENO) != 0)
728 error (EXIT_FAILURE, errno, "-");
730 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);