1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
31 #include "readtokens0.h"
32 #include "safe-read.h"
35 #if !defined iswspace && !HAVE_ISWSPACE
36 # define iswspace(wc) \
37 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
40 /* The official name of this program (e.g., no `g' prefix). */
41 #define PROGRAM_NAME "wc"
43 #define AUTHORS "Paul Rubin", "David MacKenzie"
45 /* Size of atomic reads. */
46 #define BUFFER_SIZE (16 * 1024)
48 /* The name this program was run with. */
51 /* Cumulative number of lines, words, chars and bytes in all files so far.
52 max_line_length is the maximum over all files processed so far. */
53 static uintmax_t total_lines;
54 static uintmax_t total_words;
55 static uintmax_t total_chars;
56 static uintmax_t total_bytes;
57 static uintmax_t max_line_length;
59 /* Which counts to print. */
60 static bool print_lines, print_words, print_chars, print_bytes;
61 static bool print_linelength;
63 /* The print width of each count. */
64 static int number_width;
66 /* True if we have ever read the standard input. */
67 static bool have_read_stdin;
69 /* The result of calling fstat or stat on a file descriptor or file. */
72 /* If positive, fstat or stat has not been called yet. Otherwise,
73 this is the value returned from fstat or stat. */
76 /* If FAILED is zero, this is the file's status. */
80 /* For long options that have no equivalent short option, use a
81 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
84 FILES0_FROM_OPTION = CHAR_MAX + 1
87 static struct option const longopts[] =
89 {"bytes", no_argument, NULL, 'c'},
90 {"chars", no_argument, NULL, 'm'},
91 {"lines", no_argument, NULL, 'l'},
92 {"words", no_argument, NULL, 'w'},
93 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
94 {"max-line-length", no_argument, NULL, 'L'},
95 {GETOPT_HELP_OPTION_DECL},
96 {GETOPT_VERSION_OPTION_DECL},
103 if (status != EXIT_SUCCESS)
104 fprintf (stderr, _("Try `%s --help' for more information.\n"),
109 Usage: %s [OPTION]... [FILE]...\n\
110 or: %s [OPTION]... --files0-from=F\n\
112 program_name, program_name);
114 Print newline, word, and byte counts for each FILE, and a total line if\n\
115 more than one FILE is specified. With no FILE, or when FILE is -,\n\
116 read standard input.\n\
117 -c, --bytes print the byte counts\n\
118 -m, --chars print the character counts\n\
119 -l, --lines print the newline counts\n\
122 --files0-from=F read input from the files specified by\n\
123 NUL-terminated names in file F\n\
124 -L, --max-line-length print the length of the longest line\n\
125 -w, --words print the word counts\n\
127 fputs (HELP_OPTION_DESCRIPTION, stdout);
128 fputs (VERSION_OPTION_DESCRIPTION, stdout);
129 emit_bug_reporting_address ();
134 /* FILE is the name of the file (or NULL for standard input)
135 associated with the specified counters. */
137 write_counts (uintmax_t lines,
141 uintmax_t linelength,
144 static char const format_sp_int[] = " %*s";
145 char const *format_int = format_sp_int + 1;
146 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
150 printf (format_int, number_width, umaxtostr (lines, buf));
151 format_int = format_sp_int;
155 printf (format_int, number_width, umaxtostr (words, buf));
156 format_int = format_sp_int;
160 printf (format_int, number_width, umaxtostr (chars, buf));
161 format_int = format_sp_int;
165 printf (format_int, number_width, umaxtostr (bytes, buf));
166 format_int = format_sp_int;
168 if (print_linelength)
170 printf (format_int, number_width, umaxtostr (linelength, buf));
173 printf (" %s", file);
177 /* Count words. FILE_X is the name of the file (or NULL for standard
178 input) that is open on descriptor FD. *FSTATUS is its status.
179 Return true if successful. */
181 wc (int fd, char const *file_x, struct fstatus *fstatus)
184 char buf[BUFFER_SIZE + 1];
186 uintmax_t lines, words, chars, bytes, linelength;
187 bool count_bytes, count_chars, count_complicated;
188 char const *file = file_x ? file_x : _("standard input");
190 lines = words = chars = bytes = linelength = 0;
192 /* If in the current locale, chars are equivalent to bytes, we prefer
193 counting bytes, because that's easier. */
194 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
197 count_bytes = print_bytes;
198 count_chars = print_chars;
203 count_bytes = print_bytes | print_chars;
206 count_complicated = print_words | print_linelength;
208 /* When counting only bytes, save some line- and word-counting
209 overhead. If FD is a `regular' Unix file, using lseek is enough
210 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
211 bytes at a time until EOF. Note that the `size' (number of bytes)
212 that wc reports is smaller than stats.st_size when the file is not
213 positioned at its beginning. That's why the lseek calls below are
214 necessary. For example the command
215 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
216 should make wc report `0' bytes. */
218 if (count_bytes & !count_chars & !print_lines & !count_complicated)
220 off_t current_pos, end_pos;
222 if (0 < fstatus->failed)
223 fstatus->failed = fstat (fd, &fstatus->st);
225 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
226 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
227 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
229 /* Be careful here. The current position may actually be
230 beyond the end of the file. As in the example above. */
231 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
235 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
237 if (bytes_read == SAFE_READ_ERROR)
239 error (0, errno, "%s", file);
247 else if (!count_chars & !count_complicated)
249 /* Use a separate loop when counting only lines or lines and bytes --
250 but not chars or words. */
251 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
255 if (bytes_read == SAFE_READ_ERROR)
257 error (0, errno, "%s", file);
262 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
270 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
271 # define SUPPORT_OLD_MBRTOWC 1
272 else if (MB_CUR_MAX > 1)
274 bool in_word = false;
275 uintmax_t linepos = 0;
276 mbstate_t state = { 0, };
277 # if SUPPORT_OLD_MBRTOWC
278 /* Back-up the state before each multibyte character conversion and
279 move the last incomplete character of the buffer to the front
280 of the buffer. This is needed because we don't know whether
281 the `mbrtowc' function updates the state when it returns -2, -
282 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
283 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
284 autoconf test for this, yet. */
285 size_t prev = 0; /* number of bytes carried over from previous round */
287 const size_t prev = 0;
290 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
293 # if SUPPORT_OLD_MBRTOWC
294 mbstate_t backup_state;
296 if (bytes_read == SAFE_READ_ERROR)
298 error (0, errno, "%s", file);
311 # if SUPPORT_OLD_MBRTOWC
312 backup_state = state;
314 n = mbrtowc (&wide_char, p, bytes_read, &state);
315 if (n == (size_t) -2)
317 # if SUPPORT_OLD_MBRTOWC
318 state = backup_state;
322 if (n == (size_t) -1)
324 /* Remember that we read a byte, but don't complain
325 about the error. Because of the decoding error,
326 this is a considered to be byte but not a
327 character (that is, chars is not incremented). */
348 if (linepos > linelength)
349 linelength = linepos;
351 goto mb_word_separator;
353 linepos += 8 - (linepos % 8);
354 goto mb_word_separator;
364 if (iswprint (wide_char))
366 int width = wcwidth (wide_char);
369 if (iswspace (wide_char))
370 goto mb_word_separator;
377 while (bytes_read > 0);
379 # if SUPPORT_OLD_MBRTOWC
382 if (bytes_read == BUFFER_SIZE)
384 /* Encountered a very long redundant shift sequence. */
388 memmove (buf, p, bytes_read);
393 if (linepos > linelength)
394 linelength = linepos;
400 bool in_word = false;
401 uintmax_t linepos = 0;
403 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
406 if (bytes_read == SAFE_READ_ERROR)
408 error (0, errno, "%s", file);
423 if (linepos > linelength)
424 linelength = linepos;
428 linepos += 8 - (linepos % 8);
439 if (isprint (to_uchar (p[-1])))
442 if (isspace (to_uchar (p[-1])))
449 while (--bytes_read);
451 if (linepos > linelength)
452 linelength = linepos;
456 if (count_chars < print_chars)
459 write_counts (lines, words, chars, bytes, linelength, file_x);
460 total_lines += lines;
461 total_words += words;
462 total_chars += chars;
463 total_bytes += bytes;
464 if (linelength > max_line_length)
465 max_line_length = linelength;
471 wc_file (char const *file, struct fstatus *fstatus)
473 if (! file || STREQ (file, "-"))
475 have_read_stdin = true;
476 if (O_BINARY && ! isatty (STDIN_FILENO))
477 freopen (NULL, "rb", stdin);
478 return wc (STDIN_FILENO, file, fstatus);
482 int fd = open (file, O_RDONLY | O_BINARY);
485 error (0, errno, "%s", file);
490 bool ok = wc (fd, file, fstatus);
493 error (0, errno, "%s", file);
501 /* Return the file status for the NFILES files addressed by FILE.
502 Optimize the case where only one number is printed, for just one
503 file; in that case we can use a print width of 1, so we don't need
506 static struct fstatus *
507 get_input_fstatus (int nfiles, char * const *file)
509 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
512 && ((print_lines + print_words + print_chars
513 + print_bytes + print_linelength)
515 fstatus[0].failed = 1;
520 for (i = 0; i < nfiles; i++)
521 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
522 ? fstat (STDIN_FILENO, &fstatus[i].st)
523 : stat (file[i], &fstatus[i].st));
529 /* Return a print width suitable for the NFILES files whose status is
530 recorded in FSTATUS. Optimize the same special case that
531 get_input_fstatus optimizes. */
534 compute_number_width (int nfiles, struct fstatus const *fstatus)
538 if (0 < nfiles && fstatus[0].failed <= 0)
540 int minimum_width = 1;
541 uintmax_t regular_total = 0;
544 for (i = 0; i < nfiles; i++)
545 if (! fstatus[i].failed)
547 if (S_ISREG (fstatus[i].st.st_mode))
548 regular_total += fstatus[i].st.st_size;
553 for (; 10 <= regular_total; regular_total /= 10)
555 if (width < minimum_width)
556 width = minimum_width;
564 main (int argc, char **argv)
571 char *files_from = NULL;
572 struct fstatus *fstatus;
575 initialize_main (&argc, &argv);
576 program_name = argv[0];
577 setlocale (LC_ALL, "");
578 bindtextdomain (PACKAGE, LOCALEDIR);
579 textdomain (PACKAGE);
581 atexit (close_stdout);
583 print_lines = print_words = print_chars = print_bytes = false;
584 print_linelength = false;
585 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
587 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
607 print_linelength = true;
610 case FILES0_FROM_OPTION:
614 case_GETOPT_HELP_CHAR;
616 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
619 usage (EXIT_FAILURE);
622 if (! (print_lines | print_words | print_chars | print_bytes
624 print_lines = print_words = print_bytes = true;
630 /* When using --files0-from=F, you may not specify any files
631 on the command-line. */
634 error (0, 0, _("extra operand %s"), quote (argv[optind]));
635 fprintf (stderr, "%s\n",
636 _("File operands cannot be combined with --files0-from."));
637 usage (EXIT_FAILURE);
640 if (STREQ (files_from, "-"))
644 stream = fopen (files_from, "r");
646 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
650 readtokens0_init (&tok);
652 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
653 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
661 static char *stdin_only[2];
662 files = (optind < argc ? argv + optind : stdin_only);
663 nfiles = (optind < argc ? argc - optind : 1);
664 stdin_only[0] = NULL;
667 fstatus = get_input_fstatus (nfiles, files);
668 number_width = compute_number_width (nfiles, fstatus);
671 for (i = 0; i < nfiles; i++)
673 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
677 _("when reading file names from stdin, "
678 "no file name of %s allowed"),
682 ok &= wc_file (files[i], &fstatus[i]);
686 write_counts (total_lines, total_words, total_chars, total_bytes,
687 max_line_length, _("total"));
691 if (have_read_stdin && close (STDIN_FILENO) != 0)
692 error (EXIT_FAILURE, errno, "-");
694 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);