1 /* wc - print the number of bytes, words, and lines in files
2 Copyright (C) 85, 91, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
23 # include <inttypes.h>
28 #include <sys/types.h>
30 /* Get mbstate_t, mbrtowc(), wcwidth(). */
39 #if !defined iswprint && !HAVE_ISWPRINT
40 # define iswprint(wc) 1
43 /* Include this after wctype.h so that we `#undef' ISPRINT
44 (from Solaris's euc.h, from widec.h, from wctype.h) before
45 redefining and using it. */
51 #include "safe-read.h"
53 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
54 #if HAVE_MBRTOWC && defined mbstate_t
55 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
58 #ifndef HAVE_DECL_WCWIDTH
59 "this configure-time declaration test was not run"
61 #if !HAVE_DECL_WCWIDTH
62 extern int wcwidth ();
65 /* If wcwidth() doesn't exist, assume all printable characters have
67 #if !defined wcwidth && !HAVE_WCWIDTH
68 # define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
71 /* The official name of this program (e.g., no `g' prefix). */
72 #define PROGRAM_NAME "wc"
74 #define AUTHORS N_ ("Paul Rubin and David MacKenzie")
76 /* Size of atomic reads. */
77 #define BUFFER_SIZE (16 * 1024)
79 /* The name this program was run with. */
82 /* Cumulative number of lines, words, chars and bytes in all files so far.
83 max_line_length is the maximum over all files processed so far. */
84 static uintmax_t total_lines;
85 static uintmax_t total_words;
86 static uintmax_t total_chars;
87 static uintmax_t total_bytes;
88 static uintmax_t max_line_length;
90 /* Which counts to print. */
91 static int print_lines, print_words, print_chars, print_bytes;
92 static int print_linelength;
94 /* Nonzero if we have ever read the standard input. */
95 static int have_read_stdin;
97 /* The error code to return to the system. */
98 static int exit_status;
100 /* If nonzero, do not line up columns but instead separate numbers by
101 a single space as specified in Single Unix Specification and POSIX. */
102 static int posixly_correct;
104 static struct option const longopts[] =
106 {"bytes", no_argument, NULL, 'c'},
107 {"chars", no_argument, NULL, 'm'},
108 {"lines", no_argument, NULL, 'l'},
109 {"words", no_argument, NULL, 'w'},
110 {"max-line-length", no_argument, NULL, 'L'},
111 {GETOPT_HELP_OPTION_DECL},
112 {GETOPT_VERSION_OPTION_DECL},
120 fprintf (stderr, _("Try `%s --help' for more information.\n"),
125 Usage: %s [OPTION]... [FILE]...\n\
129 Print newline, word, and byte counts for each FILE, and a total line if\n\
130 more than one FILE is specified. With no FILE, or when FILE is -,\n\
131 read standard input.\n\
132 -c, --bytes print the byte counts\n\
133 -m, --chars print the character counts\n\
134 -l, --lines print the newline counts\n\
137 -L, --max-line-length print the length of the longest line\n\
138 -w, --words print the word counts\n\
141 --help display this help and exit\n\
142 --version output version information and exit\n\
144 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
146 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
150 write_counts (uintmax_t lines,
154 uintmax_t linelength,
157 char buf[LONGEST_HUMAN_READABLE + 1];
158 char const *space = "";
159 char const *format_int = (posixly_correct ? "%s" : "%7s");
160 char const *format_sp_int = (posixly_correct ? "%s%s" : "%s%7s");
164 printf (format_int, human_readable (lines, buf, 1, 1));
169 printf (format_sp_int, space, human_readable (words, buf, 1, 1));
174 printf (format_sp_int, space, human_readable (chars, buf, 1, 1));
179 printf (format_sp_int, space, human_readable (bytes, buf, 1, 1));
182 if (print_linelength)
184 printf (format_sp_int, space, human_readable (linelength, buf, 1, 1));
187 printf (" %s", file);
192 wc (int fd, const char *file)
194 char buf[BUFFER_SIZE + 1];
196 uintmax_t lines, words, chars, bytes, linelength;
197 int count_bytes, count_chars, count_complicated;
199 lines = words = chars = bytes = linelength = 0;
201 /* If in the current locale, chars are equivalent to bytes, we prefer
202 counting bytes, because that's easier. */
203 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
206 count_bytes = print_bytes;
207 count_chars = print_chars;
212 count_bytes = print_bytes + print_chars;
215 count_complicated = print_words + print_linelength;
217 /* We need binary input, since `wc' relies on `lseek' and byte counts. */
220 /* When counting only bytes, save some line- and word-counting
221 overhead. If FD is a `regular' Unix file, using lseek is enough
222 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
223 bytes at a time until EOF. Note that the `size' (number of bytes)
224 that wc reports is smaller than stats.st_size when the file is not
225 positioned at its beginning. That's why the lseek calls below are
226 necessary. For example the command
227 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
228 should make wc report `0' bytes. */
230 if (count_bytes && !count_chars && !print_lines && !count_complicated)
232 off_t current_pos, end_pos;
235 if (fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode)
236 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
237 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
240 /* Be careful here. The current position may actually be
241 beyond the end of the file. As in the example above. */
242 bytes = (diff = end_pos - current_pos) < 0 ? 0 : diff;
246 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
252 error (0, errno, "%s", file);
257 else if (!count_chars && !count_complicated)
259 /* Use a separate loop when counting only lines or lines and bytes --
260 but not chars or words. */
261 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
263 register char *p = buf;
265 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
274 error (0, errno, "%s", file);
278 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
279 # define SUPPORT_OLD_MBRTOWC 1
280 else if (MB_CUR_MAX > 1)
283 uintmax_t linepos = 0;
285 uintmax_t last_error_line = 0;
286 int last_error_errno = 0;
287 # if SUPPORT_OLD_MBRTOWC
288 /* Back-up the state before each multibyte character conversion and
289 move the last incomplete character of the buffer to the front
290 of the buffer. This is needed because we don't know whether
291 the `mbrtowc' function updates the state when it returns -2, -
292 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
293 ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We don't have an
294 autoconf test for this, yet. */
295 int prev = 0; /* number of bytes carried over from previous round */
300 memset (&state, 0, sizeof (mbstate_t));
301 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
304 # if SUPPORT_OLD_MBRTOWC
305 mbstate_t backup_state;
316 # if SUPPORT_OLD_MBRTOWC
317 backup_state = state;
319 n = mbrtowc (&wide_char, p, bytes_read, &state);
320 if (n == (size_t) -2)
322 # if SUPPORT_OLD_MBRTOWC
323 state = backup_state;
327 if (n == (size_t) -1)
329 /* Signal repeated errors only once per line. */
330 if (!(lines + 1 == last_error_line
331 && errno == last_error_errno))
333 char hr_buf[LONGEST_HUMAN_READABLE + 1];
334 last_error_line = lines + 1;
335 last_error_errno = errno;
336 error (0, errno, "%s:%s", file,
337 human_readable (lines + 1, hr_buf, 1, 1));
359 if (linepos > linelength)
360 linelength = linepos;
362 goto mb_word_separator;
364 linepos += 8 - (linepos % 8);
365 goto mb_word_separator;
378 if (iswprint (wide_char))
380 int width = wcwidth (wide_char);
389 while (bytes_read > 0);
391 # if SUPPORT_OLD_MBRTOWC
394 if (bytes_read == BUFFER_SIZE)
396 /* Encountered a very long redundant shift sequence. */
400 memmove (buf, p, bytes_read);
407 error (0, errno, "%s", file);
410 if (linepos > linelength)
411 linelength = linepos;
419 uintmax_t linepos = 0;
421 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
435 if (linepos > linelength)
436 linelength = linepos;
440 linepos += 8 - (linepos % 8);
454 if (ISPRINT ((unsigned char) p[-1]))
462 while (--bytes_read);
466 error (0, errno, "%s", file);
469 if (linepos > linelength)
470 linelength = linepos;
475 if (count_chars < print_chars)
478 write_counts (lines, words, chars, bytes, linelength, file);
479 total_lines += lines;
480 total_words += words;
481 total_chars += chars;
482 total_bytes += bytes;
483 if (linelength > max_line_length)
484 max_line_length = linelength;
488 wc_file (const char *file)
490 if (STREQ (file, "-"))
497 int fd = open (file, O_RDONLY);
500 error (0, errno, "%s", file);
507 error (0, errno, "%s", file);
514 main (int argc, char **argv)
519 program_name = argv[0];
520 setlocale (LC_ALL, "");
521 bindtextdomain (PACKAGE, LOCALEDIR);
522 textdomain (PACKAGE);
524 atexit (close_stdout);
527 posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
528 print_lines = print_words = print_chars = print_bytes = print_linelength = 0;
529 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
531 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
554 print_linelength = 1;
557 case_GETOPT_HELP_CHAR;
559 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
565 if (print_lines + print_words + print_chars + print_bytes + print_linelength
567 print_lines = print_words = print_bytes = 1;
569 nfiles = argc - optind;
578 for (; optind < argc; ++optind)
579 wc_file (argv[optind]);
582 write_counts (total_lines, total_words, total_chars, total_bytes,
583 max_line_length, _("total"));
586 if (have_read_stdin && close (0))
587 error (EXIT_FAILURE, errno, "-");
589 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);