fold: unicode support. Based on a patch by Tomas Heinrich <heinrich.tomas@gmail.com>
[platform/upstream/busybox.git] / coreutils / fold.c
1 /* vi: set sw=4 ts=4: */
2 /* fold -- wrap each input line to fit in specified width.
3
4    Written by David MacKenzie, djm@gnu.ai.mit.edu.
5    Copyright (C) 91, 1995-2002 Free Software Foundation, Inc.
6
7    Modified for busybox based on coreutils v 5.0
8    Copyright (C) 2003 Glenn McGrath
9
10    Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
11 */
12 #include "libbb.h"
13 #include "unicode.h"
14
15 /* Must match getopt32 call */
16 #define FLAG_COUNT_BYTES        1
17 #define FLAG_BREAK_SPACES       2
18 #define FLAG_WIDTH              4
19
20 /* Assuming the current column is COLUMN, return the column that
21    printing C will move the cursor to.
22    The first column is 0. */
23 static int adjust_column(unsigned column, char c)
24 {
25         if (option_mask32 & FLAG_COUNT_BYTES)
26                 return ++column;
27
28         if (c == '\t')
29                 return column + 8 - column % 8;
30
31         if (c == '\b') {
32                 if ((int)--column < 0)
33                         column = 0;
34         }
35         else if (c == '\r')
36                 column = 0;
37         else { /* just a printable char */
38                 if (unicode_status != UNICODE_ON /* every byte is a new char */
39                  || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
40                 ) {
41                         column++;
42                 }
43         }
44         return column;
45 }
46
47 /* Note that this function can write NULs, unlike fputs etc. */
48 static void write2stdout(const void *buf, unsigned size)
49 {
50         fwrite(buf, 1, size, stdout);
51 }
52
53 int fold_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
54 int fold_main(int argc UNUSED_PARAM, char **argv)
55 {
56         char *line_out = NULL;
57         const char *w_opt = "80";
58         unsigned width;
59         smallint exitcode = EXIT_SUCCESS;
60
61         init_unicode();
62
63         if (ENABLE_INCLUDE_SUSv2) {
64                 /* Turn any numeric options into -w options.  */
65                 int i;
66                 for (i = 1; argv[i]; i++) {
67                         const char *a = argv[i];
68                         if (*a == '-') {
69                                 a++;
70                                 if (*a == '-' && !a[1]) /* "--" */
71                                         break;
72                                 if (isdigit(*a))
73                                         argv[i] = xasprintf("-w%s", a);
74                         }
75                 }
76         }
77
78         getopt32(argv, "bsw:", &w_opt);
79         width = xatou_range(w_opt, 1, 10000);
80
81         argv += optind;
82         if (!*argv)
83                 *--argv = (char*)"-";
84
85         do {
86                 FILE *istream = fopen_or_warn_stdin(*argv);
87                 int c;
88                 unsigned column = 0;     /* Screen column where next char will go */
89                 unsigned offset_out = 0; /* Index in 'line_out' for next char */
90
91                 if (istream == NULL) {
92                         exitcode = EXIT_FAILURE;
93                         continue;
94                 }
95
96                 while ((c = getc(istream)) != EOF) {
97                         /* We grow line_out in chunks of 0x1000 bytes */
98                         if ((offset_out & 0xfff) == 0) {
99                                 line_out = xrealloc(line_out, offset_out + 0x1000);
100                         }
101  rescan:
102                         line_out[offset_out] = c;
103                         if (c == '\n') {
104                                 write2stdout(line_out, offset_out + 1);
105                                 column = offset_out = 0;
106                                 continue;
107                         }
108                         column = adjust_column(column, c);
109                         if (column <= width || offset_out == 0) {
110                                 /* offset_out == 0 case happens
111                                  * with small width (say, 1) and tabs.
112                                  * The very first tab already goes to column 8,
113                                  * but we must not wrap it */
114                                 offset_out++;
115                                 continue;
116                         }
117
118                         /* This character would make the line too long.
119                          * Print the line plus a newline, and make this character
120                          * start the next line */
121                         if (option_mask32 & FLAG_BREAK_SPACES) {
122                                 unsigned i;
123                                 unsigned logical_end;
124
125                                 /* Look for the last blank. */
126                                 for (logical_end = offset_out - 1; (int)logical_end >= 0; logical_end--) {
127                                         if (!isblank(line_out[logical_end]))
128                                                 continue;
129
130                                         /* Found a space or tab.
131                                          * Output up to and including it, and start a new line */
132                                         logical_end++;
133                                         /*line_out[logical_end] = '\n'; - NO! this nukes one buffered character */
134                                         write2stdout(line_out, logical_end);
135                                         putchar('\n');
136                                         /* Move the remainder to the beginning of the next line.
137                                          * The areas being copied here might overlap. */
138                                         memmove(line_out, line_out + logical_end, offset_out - logical_end);
139                                         offset_out -= logical_end;
140                                         for (column = i = 0; i < offset_out; i++) {
141                                                 column = adjust_column(column, line_out[i]);
142                                         }
143                                         goto rescan;
144                                 }
145                                 /* No blank found, wrap will split the overlong word */
146                         }
147                         /* Output what we accumulated up to now, and start a new line */
148                         line_out[offset_out] = '\n';
149                         write2stdout(line_out, offset_out + 1);
150                         column = offset_out = 0;
151                         goto rescan;
152                 } /* while (not EOF) */
153
154                 if (offset_out) {
155                         write2stdout(line_out, offset_out);
156                 }
157
158                 if (fclose_if_not_stdin(istream)) {
159                         bb_simple_perror_msg(*argv);
160                         exitcode = EXIT_FAILURE;
161                 }
162         } while (*++argv);
163
164         fflush_stdout_and_exit(exitcode);
165 }