1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
27 /* Get mbstate_t, mbrtowc(), wcwidth(). */
32 /* Get iswprint(), iswspace(). */
36 #if !defined iswprint && !HAVE_ISWPRINT
37 # define iswprint(wc) 1
39 #if !defined iswspace && !HAVE_ISWSPACE
40 # define iswspace(wc) \
41 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
48 #include "readtokens0.h"
49 #include "safe-read.h"
51 #ifndef HAVE_DECL_WCWIDTH
52 "this configure-time declaration test was not run"
54 #if !HAVE_DECL_WCWIDTH
55 extern int wcwidth ();
58 /* If wcwidth() doesn't exist, assume all printable characters have
60 #if !defined wcwidth && !HAVE_WCWIDTH
61 # define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
64 /* The official name of this program (e.g., no `g' prefix). */
65 #define PROGRAM_NAME "wc"
67 #define AUTHORS "Paul Rubin", "David MacKenzie"
69 /* Size of atomic reads. */
70 #define BUFFER_SIZE (16 * 1024)
72 /* The name this program was run with. */
75 /* Cumulative number of lines, words, chars and bytes in all files so far.
76 max_line_length is the maximum over all files processed so far. */
77 static uintmax_t total_lines;
78 static uintmax_t total_words;
79 static uintmax_t total_chars;
80 static uintmax_t total_bytes;
81 static uintmax_t max_line_length;
83 /* Which counts to print. */
84 static bool print_lines, print_words, print_chars, print_bytes;
85 static bool print_linelength;
87 /* The print width of each count. */
88 static int number_width;
90 /* True if we have ever read the standard input. */
91 static bool have_read_stdin;
93 /* The result of calling fstat or stat on a file descriptor or file. */
96 /* If positive, fstat or stat has not been called yet. Otherwise,
97 this is the value returned from fstat or stat. */
100 /* If FAILED is zero, this is the file's status. */
104 /* For long options that have no equivalent short option, use a
105 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
108 FILES0_FROM_OPTION = CHAR_MAX + 1
111 static struct option const longopts[] =
113 {"bytes", no_argument, NULL, 'c'},
114 {"chars", no_argument, NULL, 'm'},
115 {"lines", no_argument, NULL, 'l'},
116 {"words", no_argument, NULL, 'w'},
117 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
118 {"max-line-length", no_argument, NULL, 'L'},
119 {GETOPT_HELP_OPTION_DECL},
120 {GETOPT_VERSION_OPTION_DECL},
127 if (status != EXIT_SUCCESS)
128 fprintf (stderr, _("Try `%s --help' for more information.\n"),
133 Usage: %s [OPTION]... [FILE]...\n\
134 or: %s [OPTION]... --files0-from=F\n\
136 program_name, program_name);
138 Print newline, word, and byte counts for each FILE, and a total line if\n\
139 more than one FILE is specified. With no FILE, or when FILE is -,\n\
140 read standard input.\n\
141 -c, --bytes print the byte counts\n\
142 -m, --chars print the character counts\n\
143 -l, --lines print the newline counts\n\
146 --files0-from=F read input from the files specified by\n\
147 NUL-terminated names in file F\n\
148 -L, --max-line-length print the length of the longest line\n\
149 -w, --words print the word counts\n\
151 fputs (HELP_OPTION_DESCRIPTION, stdout);
152 fputs (VERSION_OPTION_DESCRIPTION, stdout);
153 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
158 /* FILE is the name of the file (or NULL for standard input)
159 associated with the specified counters. */
161 write_counts (uintmax_t lines,
165 uintmax_t linelength,
168 static char const format_sp_int[] = " %*s";
169 char const *format_int = format_sp_int + 1;
170 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
174 printf (format_int, number_width, umaxtostr (lines, buf));
175 format_int = format_sp_int;
179 printf (format_int, number_width, umaxtostr (words, buf));
180 format_int = format_sp_int;
184 printf (format_int, number_width, umaxtostr (chars, buf));
185 format_int = format_sp_int;
189 printf (format_int, number_width, umaxtostr (bytes, buf));
190 format_int = format_sp_int;
192 if (print_linelength)
194 printf (format_int, number_width, umaxtostr (linelength, buf));
197 printf (" %s", file);
201 /* Count words. FILE_X is the name of the file (or NULL for standard
202 input) that is open on descriptor FD. *FSTATUS is its status.
203 Return true if successful. */
205 wc (int fd, char const *file_x, struct fstatus *fstatus)
208 char buf[BUFFER_SIZE + 1];
210 uintmax_t lines, words, chars, bytes, linelength;
211 bool count_bytes, count_chars, count_complicated;
212 char const *file = file_x ? file_x : _("standard input");
214 lines = words = chars = bytes = linelength = 0;
216 /* If in the current locale, chars are equivalent to bytes, we prefer
217 counting bytes, because that's easier. */
218 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
221 count_bytes = print_bytes;
222 count_chars = print_chars;
227 count_bytes = print_bytes | print_chars;
230 count_complicated = print_words | print_linelength;
232 /* When counting only bytes, save some line- and word-counting
233 overhead. If FD is a `regular' Unix file, using lseek is enough
234 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
235 bytes at a time until EOF. Note that the `size' (number of bytes)
236 that wc reports is smaller than stats.st_size when the file is not
237 positioned at its beginning. That's why the lseek calls below are
238 necessary. For example the command
239 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
240 should make wc report `0' bytes. */
242 if (count_bytes & !count_chars & !print_lines & !count_complicated)
244 off_t current_pos, end_pos;
246 if (0 < fstatus->failed)
247 fstatus->failed = fstat (fd, &fstatus->st);
249 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
250 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
251 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
253 /* Be careful here. The current position may actually be
254 beyond the end of the file. As in the example above. */
255 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
259 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
261 if (bytes_read == SAFE_READ_ERROR)
263 error (0, errno, "%s", file);
271 else if (!count_chars & !count_complicated)
273 /* Use a separate loop when counting only lines or lines and bytes --
274 but not chars or words. */
275 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
279 if (bytes_read == SAFE_READ_ERROR)
281 error (0, errno, "%s", file);
286 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
294 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
295 # define SUPPORT_OLD_MBRTOWC 1
296 else if (MB_CUR_MAX > 1)
298 bool in_word = false;
299 uintmax_t linepos = 0;
301 uintmax_t last_error_line = 0;
302 int last_error_errno = 0;
303 # if SUPPORT_OLD_MBRTOWC
304 /* Back-up the state before each multibyte character conversion and
305 move the last incomplete character of the buffer to the front
306 of the buffer. This is needed because we don't know whether
307 the `mbrtowc' function updates the state when it returns -2, -
308 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
309 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
310 autoconf test for this, yet. */
311 size_t prev = 0; /* number of bytes carried over from previous round */
313 const size_t prev = 0;
316 memset (&state, 0, sizeof (mbstate_t));
317 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
320 # if SUPPORT_OLD_MBRTOWC
321 mbstate_t backup_state;
323 if (bytes_read == SAFE_READ_ERROR)
325 error (0, errno, "%s", file);
338 # if SUPPORT_OLD_MBRTOWC
339 backup_state = state;
341 n = mbrtowc (&wide_char, p, bytes_read, &state);
342 if (n == (size_t) -2)
344 # if SUPPORT_OLD_MBRTOWC
345 state = backup_state;
349 if (n == (size_t) -1)
351 /* Signal repeated errors only once per line. */
352 if (!(lines + 1 == last_error_line
353 && errno == last_error_errno))
355 char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
356 last_error_line = lines + 1;
357 last_error_errno = errno;
358 error (0, errno, "%s:%s", file,
359 umaxtostr (last_error_line, line_number_buf));
382 if (linepos > linelength)
383 linelength = linepos;
385 goto mb_word_separator;
387 linepos += 8 - (linepos % 8);
388 goto mb_word_separator;
398 if (iswprint (wide_char))
400 int width = wcwidth (wide_char);
403 if (iswspace (wide_char))
404 goto mb_word_separator;
411 while (bytes_read > 0);
413 # if SUPPORT_OLD_MBRTOWC
416 if (bytes_read == BUFFER_SIZE)
418 /* Encountered a very long redundant shift sequence. */
422 memmove (buf, p, bytes_read);
427 if (linepos > linelength)
428 linelength = linepos;
434 bool in_word = false;
435 uintmax_t linepos = 0;
437 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
440 if (bytes_read == SAFE_READ_ERROR)
442 error (0, errno, "%s", file);
457 if (linepos > linelength)
458 linelength = linepos;
462 linepos += 8 - (linepos % 8);
473 if (isprint (to_uchar (p[-1])))
476 if (isspace (to_uchar (p[-1])))
483 while (--bytes_read);
485 if (linepos > linelength)
486 linelength = linepos;
490 if (count_chars < print_chars)
493 write_counts (lines, words, chars, bytes, linelength, file_x);
494 total_lines += lines;
495 total_words += words;
496 total_chars += chars;
497 total_bytes += bytes;
498 if (linelength > max_line_length)
499 max_line_length = linelength;
505 wc_file (char const *file, struct fstatus *fstatus)
507 if (! file || STREQ (file, "-"))
509 have_read_stdin = true;
510 if (O_BINARY && ! isatty (STDIN_FILENO))
511 freopen (NULL, "rb", stdin);
512 return wc (STDIN_FILENO, file, fstatus);
516 int fd = open (file, O_RDONLY | O_BINARY);
519 error (0, errno, "%s", file);
524 bool ok = wc (fd, file, fstatus);
527 error (0, errno, "%s", file);
535 /* Return the file status for the NFILES files addressed by FILE.
536 Optimize the case where only one number is printed, for just one
537 file; in that case we can use a print width of 1, so we don't need
540 static struct fstatus *
541 get_input_fstatus (int nfiles, char * const *file)
543 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
546 && ((print_lines + print_words + print_chars
547 + print_bytes + print_linelength)
549 fstatus[0].failed = 1;
554 for (i = 0; i < nfiles; i++)
555 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
556 ? fstat (STDIN_FILENO, &fstatus[i].st)
557 : stat (file[i], &fstatus[i].st));
563 /* Return a print width suitable for the NFILES files whose status is
564 recorded in FSTATUS. Optimize the same special case that
565 get_input_fstatus optimizes. */
568 compute_number_width (int nfiles, struct fstatus const *fstatus)
572 if (0 < nfiles && fstatus[0].failed <= 0)
574 int minimum_width = 1;
575 uintmax_t regular_total = 0;
578 for (i = 0; i < nfiles; i++)
579 if (! fstatus[i].failed)
581 if (S_ISREG (fstatus[i].st.st_mode))
582 regular_total += fstatus[i].st.st_size;
587 for (; 10 <= regular_total; regular_total /= 10)
589 if (width < minimum_width)
590 width = minimum_width;
598 main (int argc, char **argv)
605 char *files_from = NULL;
606 struct fstatus *fstatus;
609 initialize_main (&argc, &argv);
610 program_name = argv[0];
611 setlocale (LC_ALL, "");
612 bindtextdomain (PACKAGE, LOCALEDIR);
613 textdomain (PACKAGE);
615 atexit (close_stdout);
617 print_lines = print_words = print_chars = print_bytes = false;
618 print_linelength = false;
619 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
621 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
641 print_linelength = true;
644 case FILES0_FROM_OPTION:
648 case_GETOPT_HELP_CHAR;
650 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
653 usage (EXIT_FAILURE);
656 if (! (print_lines | print_words | print_chars | print_bytes
658 print_lines = print_words = print_bytes = true;
664 /* When using --files0-from=F, you may not specify any files
665 on the command-line. */
668 error (0, 0, _("extra operand %s"), quote (argv[optind]));
669 fprintf (stderr, "%s\n",
670 _("File operands cannot be combined with --files0-from."));
671 usage (EXIT_FAILURE);
674 if (STREQ (files_from, "-"))
678 stream = fopen (files_from, "r");
680 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
684 readtokens0_init (&tok);
686 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
687 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
695 static char *stdin_only[2];
696 files = (optind < argc ? argv + optind : stdin_only);
697 nfiles = (optind < argc ? argc - optind : 1);
698 stdin_only[0] = NULL;
701 fstatus = get_input_fstatus (nfiles, files);
702 number_width = compute_number_width (nfiles, fstatus);
705 for (i = 0; i < nfiles; i++)
707 if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
711 _("when reading file names from stdin, "
712 "no file name of %s allowed"),
716 ok &= wc_file (files[i], &fstatus[i]);
720 write_counts (total_lines, total_words, total_chars, total_bytes,
721 max_line_length, _("total"));
725 if (have_read_stdin && close (STDIN_FILENO) != 0)
726 error (EXIT_FAILURE, errno, "-");
728 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);