ls: fix help text: -w N is optional
[platform/upstream/busybox.git] / coreutils / wc.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wc implementation for busybox
4  *
5  * Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 /* BB_AUDIT SUSv3 compliant. */
11 /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
12
13 /* Mar 16, 2003      Manuel Novoa III   (mjn3@codepoet.org)
14  *
15  * Rewritten to fix a number of problems and do some size optimizations.
16  * Problems in the previous busybox implementation (besides bloat) included:
17  *  1) broken 'wc -c' optimization (read note below)
18  *  2) broken handling of '-' args
19  *  3) no checking of ferror on EOF returns
20  *  4) isprint() wasn't considered when word counting.
21  *
22  * NOTES:
23  *
24  * The previous busybox wc attempted an optimization using stat for the
25  * case of counting chars only.  I omitted that because it was broken.
26  * It didn't take into account the possibility of input coming from a
27  * pipe, or input from a file with file pointer not at the beginning.
28  *
29  * To implement such a speed optimization correctly, not only do you
30  * need the size, but also the file position.  Note also that the
31  * file position may be past the end of file.  Consider the example
32  * (adapted from example in gnu wc.c)
33  *
34  *      echo hello > /tmp/testfile &&
35  *      (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
36  *
37  * for which 'wc -c' should output '0'.
38  */
39 #include "libbb.h"
40 #include "unicode.h"
41
42 #if !ENABLE_LOCALE_SUPPORT
43 # undef isprint
44 # undef isspace
45 # define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
46 # define isspace(c) ((c) == ' ')
47 #endif
48
49 #if ENABLE_FEATURE_WC_LARGE
50 # define COUNT_T unsigned long long
51 # define COUNT_FMT "llu"
52 #else
53 # define COUNT_T unsigned
54 # define COUNT_FMT "u"
55 #endif
56
57 /* We support -m even when UNICODE_SUPPORT is off,
58  * we just don't advertise it in help text,
59  * since it is the same as -c in this case.
60  */
61
62 //usage:#define wc_trivial_usage
63 //usage:       "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
64 //usage:
65 //usage:#define wc_full_usage "\n\n"
66 //usage:       "Count lines, words, and bytes for each FILE (or stdin)\n"
67 //usage:     "\nOptions:"
68 //usage:     "\n        -c      Count bytes"
69 //usage:        IF_UNICODE_SUPPORT(
70 //usage:     "\n        -m      Count characters"
71 //usage:        )
72 //usage:     "\n        -l      Count newlines"
73 //usage:     "\n        -w      Count words"
74 //usage:     "\n        -L      Print longest line length"
75 //usage:
76 //usage:#define wc_example_usage
77 //usage:       "$ wc /etc/passwd\n"
78 //usage:       "     31      46    1365 /etc/passwd\n"
79
80 /* Order is important if we want to be compatible with
81  * column order in "wc -cmlwL" output:
82  */
83 enum {
84         WC_LINES    = 0, /* -l */
85         WC_WORDS    = 1, /* -w */
86         WC_UNICHARS = 2, /* -m */
87         WC_BYTES    = 3, /* -c */
88         WC_LENGTH   = 4, /* -L */
89         NUM_WCS     = 5,
90 };
91
92 int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
93 int wc_main(int argc UNUSED_PARAM, char **argv)
94 {
95         const char *arg;
96         const char *start_fmt = " %9"COUNT_FMT + 1;
97         const char *fname_fmt = " %s\n";
98         COUNT_T *pcounts;
99         COUNT_T counts[NUM_WCS];
100         COUNT_T totals[NUM_WCS];
101         int num_files;
102         smallint status = EXIT_SUCCESS;
103         unsigned print_type;
104
105         init_unicode();
106
107         print_type = getopt32(argv, "lwmcL");
108
109         if (print_type == 0) {
110                 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES);
111         }
112
113         argv += optind;
114         if (!argv[0]) {
115                 *--argv = (char *) bb_msg_standard_input;
116                 fname_fmt = "\n";
117         }
118         if (!argv[1]) { /* zero or one filename? */
119                 if (!((print_type-1) & print_type)) /* exactly one option? */
120                         start_fmt = "%"COUNT_FMT;
121         }
122
123         memset(totals, 0, sizeof(totals));
124
125         pcounts = counts;
126
127         num_files = 0;
128         while ((arg = *argv++) != NULL) {
129                 FILE *fp;
130                 const char *s;
131                 unsigned u;
132                 unsigned linepos;
133                 smallint in_word;
134
135                 ++num_files;
136                 fp = fopen_or_warn_stdin(arg);
137                 if (!fp) {
138                         status = EXIT_FAILURE;
139                         continue;
140                 }
141
142                 memset(counts, 0, sizeof(counts));
143                 linepos = 0;
144                 in_word = 0;
145
146                 while (1) {
147                         int c;
148                         /* Our -w doesn't match GNU wc exactly... oh well */
149
150                         c = getc(fp);
151                         if (c == EOF) {
152                                 if (ferror(fp)) {
153                                         bb_simple_perror_msg(arg);
154                                         status = EXIT_FAILURE;
155                                 }
156                                 goto DO_EOF;  /* Treat an EOF as '\r'. */
157                         }
158
159                         /* Cater for -c and -m */
160                         ++counts[WC_BYTES];
161                         if (unicode_status != UNICODE_ON /* every byte is a new char */
162                          || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
163                         ) {
164                                 ++counts[WC_UNICHARS];
165                         }
166
167                         if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
168                                 ++linepos;
169                                 if (!isspace(c)) {
170                                         in_word = 1;
171                                         continue;
172                                 }
173                         } else if ((unsigned)(c - 9) <= 4) {
174                                 /* \t  9
175                                  * \n 10
176                                  * \v 11
177                                  * \f 12
178                                  * \r 13
179                                  */
180                                 if (c == '\t') {
181                                         linepos = (linepos | 7) + 1;
182                                 } else {  /* '\n', '\r', '\f', or '\v' */
183  DO_EOF:
184                                         if (linepos > counts[WC_LENGTH]) {
185                                                 counts[WC_LENGTH] = linepos;
186                                         }
187                                         if (c == '\n') {
188                                                 ++counts[WC_LINES];
189                                         }
190                                         if (c != '\v') {
191                                                 linepos = 0;
192                                         }
193                                 }
194                         } else {
195                                 continue;
196                         }
197
198                         counts[WC_WORDS] += in_word;
199                         in_word = 0;
200                         if (c == EOF) {
201                                 break;
202                         }
203                 }
204
205                 fclose_if_not_stdin(fp);
206
207                 if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
208                         totals[WC_LENGTH] = counts[WC_LENGTH];
209                 }
210                 totals[WC_LENGTH] -= counts[WC_LENGTH];
211
212  OUTPUT:
213                 /* coreutils wc tries hard to print pretty columns
214                  * (saves results for all files, finds max col len etc...)
215                  * we won't try that hard, it will bloat us too much */
216                 s = start_fmt;
217                 u = 0;
218                 do {
219                         if (print_type & (1 << u)) {
220                                 printf(s, pcounts[u]);
221                                 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
222                         }
223                         totals[u] += pcounts[u];
224                 } while (++u < NUM_WCS);
225                 printf(fname_fmt, arg);
226         }
227
228         /* If more than one file was processed, we want the totals.  To save some
229          * space, we set the pcounts ptr to the totals array.  This has the side
230          * effect of trashing the totals array after outputting it, but that's
231          * irrelavent since we no longer need it. */
232         if (num_files > 1) {
233                 num_files = 0;  /* Make sure we don't get here again. */
234                 arg = "total";
235                 pcounts = totals;
236                 --argv;
237                 goto OUTPUT;
238         }
239
240         fflush_stdout_and_exit(status);
241 }