1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
31 #include "readtokens0.h"
32 #include "safe-read.h"
35 #if !defined iswspace && !HAVE_ISWSPACE
36 # define iswspace(wc) \
37 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
40 /* The official name of this program (e.g., no `g' prefix). */
41 #define PROGRAM_NAME "wc"
43 #define AUTHORS "Paul Rubin", "David MacKenzie"
45 /* Size of atomic reads. */
46 #define BUFFER_SIZE (16 * 1024)
48 /* The name this program was run with. */
51 /* Cumulative number of lines, words, chars and bytes in all files so far.
52 max_line_length is the maximum over all files processed so far. */
53 static uintmax_t total_lines;
54 static uintmax_t total_words;
55 static uintmax_t total_chars;
56 static uintmax_t total_bytes;
57 static uintmax_t max_line_length;
59 /* Which counts to print. */
60 static bool print_lines, print_words, print_chars, print_bytes;
61 static bool print_linelength;
63 /* The print width of each count. */
64 static int number_width;
66 /* True if we have ever read the standard input. */
67 static bool have_read_stdin;
69 /* The result of calling fstat or stat on a file descriptor or file. */
72 /* If positive, fstat or stat has not been called yet. Otherwise,
73 this is the value returned from fstat or stat. */
76 /* If FAILED is zero, this is the file's status. */
80 /* For long options that have no equivalent short option, use a
81 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
84 FILES0_FROM_OPTION = CHAR_MAX + 1
87 static struct option const longopts[] =
89 {"bytes", no_argument, NULL, 'c'},
90 {"chars", no_argument, NULL, 'm'},
91 {"lines", no_argument, NULL, 'l'},
92 {"words", no_argument, NULL, 'w'},
93 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
94 {"max-line-length", no_argument, NULL, 'L'},
95 {GETOPT_HELP_OPTION_DECL},
96 {GETOPT_VERSION_OPTION_DECL},
103 if (status != EXIT_SUCCESS)
104 fprintf (stderr, _("Try `%s --help' for more information.\n"),
109 Usage: %s [OPTION]... [FILE]...\n\
110 or: %s [OPTION]... --files0-from=F\n\
112 program_name, program_name);
114 Print newline, word, and byte counts for each FILE, and a total line if\n\
115 more than one FILE is specified. With no FILE, or when FILE is -,\n\
116 read standard input.\n\
117 -c, --bytes print the byte counts\n\
118 -m, --chars print the character counts\n\
119 -l, --lines print the newline counts\n\
122 --files0-from=F read input from the files specified by\n\
123 NUL-terminated names in file F\n\
124 -L, --max-line-length print the length of the longest line\n\
125 -w, --words print the word counts\n\
127 fputs (HELP_OPTION_DESCRIPTION, stdout);
128 fputs (VERSION_OPTION_DESCRIPTION, stdout);
129 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
134 /* FILE is the name of the file (or NULL for standard input)
135 associated with the specified counters. */
137 write_counts (uintmax_t lines,
141 uintmax_t linelength,
144 static char const format_sp_int[] = " %*s";
145 char const *format_int = format_sp_int + 1;
146 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
150 printf (format_int, number_width, umaxtostr (lines, buf));
151 format_int = format_sp_int;
155 printf (format_int, number_width, umaxtostr (words, buf));
156 format_int = format_sp_int;
160 printf (format_int, number_width, umaxtostr (chars, buf));
161 format_int = format_sp_int;
165 printf (format_int, number_width, umaxtostr (bytes, buf));
166 format_int = format_sp_int;
168 if (print_linelength)
170 printf (format_int, number_width, umaxtostr (linelength, buf));
173 printf (" %s", file);
177 /* Count words. FILE_X is the name of the file (or NULL for standard
178 input) that is open on descriptor FD. *FSTATUS is its status.
179 Return true if successful. */
181 wc (int fd, char const *file_x, struct fstatus *fstatus)
184 char buf[BUFFER_SIZE + 1];
186 uintmax_t lines, words, chars, bytes, linelength;
187 bool count_bytes, count_chars, count_complicated;
188 char const *file = file_x ? file_x : _("standard input");
190 lines = words = chars = bytes = linelength = 0;
192 /* If in the current locale, chars are equivalent to bytes, we prefer
193 counting bytes, because that's easier. */
194 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
197 count_bytes = print_bytes;
198 count_chars = print_chars;
203 count_bytes = print_bytes | print_chars;
206 count_complicated = print_words | print_linelength;
208 /* When counting only bytes, save some line- and word-counting
209 overhead. If FD is a `regular' Unix file, using lseek is enough
210 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
211 bytes at a time until EOF. Note that the `size' (number of bytes)
212 that wc reports is smaller than stats.st_size when the file is not
213 positioned at its beginning. That's why the lseek calls below are
214 necessary. For example the command
215 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
216 should make wc report `0' bytes. */
218 if (count_bytes & !count_chars & !print_lines & !count_complicated)
220 off_t current_pos, end_pos;
222 if (0 < fstatus->failed)
223 fstatus->failed = fstat (fd, &fstatus->st);
225 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
226 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
227 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
229 /* Be careful here. The current position may actually be
230 beyond the end of the file. As in the example above. */
231 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
235 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
237 if (bytes_read == SAFE_READ_ERROR)
239 error (0, errno, "%s", file);
247 else if (!count_chars & !count_complicated)
249 /* Use a separate loop when counting only lines or lines and bytes --
250 but not chars or words. */
251 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
255 if (bytes_read == SAFE_READ_ERROR)
257 error (0, errno, "%s", file);
262 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
270 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
271 # define SUPPORT_OLD_MBRTOWC 1
272 else if (MB_CUR_MAX > 1)
274 bool in_word = false;
275 uintmax_t linepos = 0;
277 uintmax_t last_error_line = 0;
278 int last_error_errno = 0;
279 # if SUPPORT_OLD_MBRTOWC
280 /* Back-up the state before each multibyte character conversion and
281 move the last incomplete character of the buffer to the front
282 of the buffer. This is needed because we don't know whether
283 the `mbrtowc' function updates the state when it returns -2, -
284 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
285 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
286 autoconf test for this, yet. */
287 size_t prev = 0; /* number of bytes carried over from previous round */
289 const size_t prev = 0;
292 memset (&state, 0, sizeof (mbstate_t));
293 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
296 # if SUPPORT_OLD_MBRTOWC
297 mbstate_t backup_state;
299 if (bytes_read == SAFE_READ_ERROR)
301 error (0, errno, "%s", file);
314 # if SUPPORT_OLD_MBRTOWC
315 backup_state = state;
317 n = mbrtowc (&wide_char, p, bytes_read, &state);
318 if (n == (size_t) -2)
320 # if SUPPORT_OLD_MBRTOWC
321 state = backup_state;
325 if (n == (size_t) -1)
327 /* Signal repeated errors only once per line. */
328 if (!(lines + 1 == last_error_line
329 && errno == last_error_errno))
331 char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
332 last_error_line = lines + 1;
333 last_error_errno = errno;
334 error (0, errno, "%s:%s", file,
335 umaxtostr (last_error_line, line_number_buf));
358 if (linepos > linelength)
359 linelength = linepos;
361 goto mb_word_separator;
363 linepos += 8 - (linepos % 8);
364 goto mb_word_separator;
374 if (iswprint (wide_char))
376 int width = wcwidth (wide_char);
379 if (iswspace (wide_char))
380 goto mb_word_separator;
387 while (bytes_read > 0);
389 # if SUPPORT_OLD_MBRTOWC
392 if (bytes_read == BUFFER_SIZE)
394 /* Encountered a very long redundant shift sequence. */
398 memmove (buf, p, bytes_read);
403 if (linepos > linelength)
404 linelength = linepos;
410 bool in_word = false;
411 uintmax_t linepos = 0;
413 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
416 if (bytes_read == SAFE_READ_ERROR)
418 error (0, errno, "%s", file);
433 if (linepos > linelength)
434 linelength = linepos;
438 linepos += 8 - (linepos % 8);
449 if (isprint (to_uchar (p[-1])))
452 if (isspace (to_uchar (p[-1])))
459 while (--bytes_read);
461 if (linepos > linelength)
462 linelength = linepos;
466 if (count_chars < print_chars)
469 write_counts (lines, words, chars, bytes, linelength, file_x);
470 total_lines += lines;
471 total_words += words;
472 total_chars += chars;
473 total_bytes += bytes;
474 if (linelength > max_line_length)
475 max_line_length = linelength;
481 wc_file (char const *file, struct fstatus *fstatus)
483 if (! file || STREQ (file, "-"))
485 have_read_stdin = true;
486 if (O_BINARY && ! isatty (STDIN_FILENO))
487 freopen (NULL, "rb", stdin);
488 return wc (STDIN_FILENO, file, fstatus);
492 int fd = open (file, O_RDONLY | O_BINARY);
495 error (0, errno, "%s", file);
500 bool ok = wc (fd, file, fstatus);
503 error (0, errno, "%s", file);
511 /* Return the file status for the NFILES files addressed by FILE.
512 Optimize the case where only one number is printed, for just one
513 file; in that case we can use a print width of 1, so we don't need
516 static struct fstatus *
517 get_input_fstatus (int nfiles, char * const *file)
519 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
522 && ((print_lines + print_words + print_chars
523 + print_bytes + print_linelength)
525 fstatus[0].failed = 1;
530 for (i = 0; i < nfiles; i++)
531 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
532 ? fstat (STDIN_FILENO, &fstatus[i].st)
533 : stat (file[i], &fstatus[i].st));
539 /* Return a print width suitable for the NFILES files whose status is
540 recorded in FSTATUS. Optimize the same special case that
541 get_input_fstatus optimizes. */
544 compute_number_width (int nfiles, struct fstatus const *fstatus)
548 if (0 < nfiles && fstatus[0].failed <= 0)
550 int minimum_width = 1;
551 uintmax_t regular_total = 0;
554 for (i = 0; i < nfiles; i++)
555 if (! fstatus[i].failed)
557 if (S_ISREG (fstatus[i].st.st_mode))
558 regular_total += fstatus[i].st.st_size;
563 for (; 10 <= regular_total; regular_total /= 10)
565 if (width < minimum_width)
566 width = minimum_width;
574 main (int argc, char **argv)
581 char *files_from = NULL;
582 struct fstatus *fstatus;
585 initialize_main (&argc, &argv);
586 program_name = argv[0];
587 setlocale (LC_ALL, "");
588 bindtextdomain (PACKAGE, LOCALEDIR);
589 textdomain (PACKAGE);
591 atexit (close_stdout);
593 print_lines = print_words = print_chars = print_bytes = false;
594 print_linelength = false;
595 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
597 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
617 print_linelength = true;
620 case FILES0_FROM_OPTION:
624 case_GETOPT_HELP_CHAR;
626 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
629 usage (EXIT_FAILURE);
632 if (! (print_lines | print_words | print_chars | print_bytes
634 print_lines = print_words = print_bytes = true;
640 /* When using --files0-from=F, you may not specify any files
641 on the command-line. */
644 error (0, 0, _("extra operand %s"), quote (argv[optind]));
645 fprintf (stderr, "%s\n",
646 _("File operands cannot be combined with --files0-from."));
647 usage (EXIT_FAILURE);
650 if (STREQ (files_from, "-"))
654 stream = fopen (files_from, "r");
656 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
660 readtokens0_init (&tok);
662 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
663 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
671 static char *stdin_only[2];
672 files = (optind < argc ? argv + optind : stdin_only);
673 nfiles = (optind < argc ? argc - optind : 1);
674 stdin_only[0] = NULL;
677 fstatus = get_input_fstatus (nfiles, files);
678 number_width = compute_number_width (nfiles, fstatus);
681 for (i = 0; i < nfiles; i++)
683 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
687 _("when reading file names from stdin, "
688 "no file name of %s allowed"),
692 ok &= wc_file (files[i], &fstatus[i]);
696 write_counts (total_lines, total_words, total_chars, total_bytes,
697 max_line_length, _("total"));
701 if (have_read_stdin && close (STDIN_FILENO) != 0)
702 error (EXIT_FAILURE, errno, "-");
704 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);