1 /* unexpand - convert blanks to tabs
2 Copyright (C) 89, 91, 1995-2006, 2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By default, convert only maximal strings of initial blanks and tabs
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
24 --tabs=tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
34 David MacKenzie <djm@gnu.ai.mit.edu> */
40 #include <sys/types.h>
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "unexpand"
49 #define AUTHORS proper_name ("David MacKenzie")
51 /* The number of bytes added at a time to the amount of memory
52 allocated for the output line. */
53 #define OUTPUT_BLOCK 256
55 /* If true, convert blanks even after nonblank characters have been
57 static bool convert_entire_line;
59 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
60 static size_t tab_size;
62 /* The maximum distance between tab stops. */
63 static size_t max_column_width;
65 /* Array of the explicit column numbers of the tab stops;
66 after `tab_list' is exhausted, the rest of the line is printed
67 unchanged. The first column is column 0. */
68 static uintmax_t *tab_list;
70 /* The number of allocated entries in `tab_list'. */
71 static size_t n_tabs_allocated;
73 /* The index of the first invalid element of `tab_list',
74 where the next element can be added. */
75 static size_t first_free_tab;
77 /* Null-terminated array of input filenames. */
78 static char **file_list;
80 /* Default for `file_list' if no files are given on the command line. */
81 static char *stdin_argv[] =
86 /* True if we have ever read standard input. */
87 static bool have_read_stdin;
89 /* The desired exit status. */
90 static int exit_status;
92 /* For long options that have no equivalent short option, use a
93 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
96 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
99 static struct option const longopts[] =
101 {"tabs", required_argument, NULL, 't'},
102 {"all", no_argument, NULL, 'a'},
103 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
104 {GETOPT_HELP_OPTION_DECL},
105 {GETOPT_VERSION_OPTION_DECL},
112 if (status != EXIT_SUCCESS)
113 fprintf (stderr, _("Try `%s --help' for more information.\n"),
118 Usage: %s [OPTION]... [FILE]...\n\
122 Convert blanks in each FILE to tabs, writing to standard output.\n\
123 With no FILE, or when FILE is -, read standard input.\n\
127 Mandatory arguments to long options are mandatory for short options too.\n\
130 -a, --all convert all blanks, instead of just initial blanks\n\
131 --first-only convert only leading sequences of blanks (overrides -a)\n\
132 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
133 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
135 fputs (HELP_OPTION_DESCRIPTION, stdout);
136 fputs (VERSION_OPTION_DESCRIPTION, stdout);
137 emit_bug_reporting_address ();
142 /* Add tab stop TABVAL to the end of `tab_list'. */
145 add_tab_stop (uintmax_t tabval)
147 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
148 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
150 if (first_free_tab == n_tabs_allocated)
151 tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
152 tab_list[first_free_tab++] = tabval;
154 if (max_column_width < column_width)
156 if (SIZE_MAX < column_width)
157 error (EXIT_FAILURE, 0, _("tabs are too far apart"));
158 max_column_width = column_width;
162 /* Add the comma or blank separated list of tab stops STOPS
163 to the list of tab stops. */
166 parse_tab_stops (char const *stops)
168 bool have_tabval = false;
169 uintmax_t tabval IF_LINT (= 0);
170 char const *num_start IF_LINT (= NULL);
173 for (; *stops; stops++)
175 if (*stops == ',' || isblank (to_uchar (*stops)))
178 add_tab_stop (tabval);
181 else if (ISDIGIT (*stops))
190 /* Detect overflow. */
191 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
193 size_t len = strspn (num_start, "0123456789");
194 char *bad_num = xstrndup (num_start, len);
195 error (0, 0, _("tab stop is too large %s"), quote (bad_num));
198 stops = num_start + len - 1;
203 error (0, 0, _("tab size contains invalid character(s): %s"),
214 add_tab_stop (tabval);
217 /* Check that the list of tab stops TABS, with ENTRIES entries,
218 contains only nonzero, ascending values. */
221 validate_tab_stops (uintmax_t const *tabs, size_t entries)
223 uintmax_t prev_tab = 0;
226 for (i = 0; i < entries; i++)
229 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
230 if (tabs[i] <= prev_tab)
231 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
236 /* Close the old stream pointer FP if it is non-NULL,
237 and return a new one opened to read the next input file.
238 Open a filename of `-' as the standard input.
239 Return NULL if there are no more input files. */
244 static char *prev_file;
251 error (0, errno, "%s", prev_file);
252 exit_status = EXIT_FAILURE;
254 if (STREQ (prev_file, "-"))
255 clearerr (fp); /* Also clear EOF. */
256 else if (fclose (fp) != 0)
258 error (0, errno, "%s", prev_file);
259 exit_status = EXIT_FAILURE;
263 while ((file = *file_list++) != NULL)
265 if (STREQ (file, "-"))
267 have_read_stdin = true;
271 fp = fopen (file, "r");
277 error (0, errno, "%s", file);
278 exit_status = EXIT_FAILURE;
283 /* Change blanks to tabs, writing to stdout.
284 Read each file in `file_list', in order. */
290 FILE *fp = next_file (NULL);
292 /* The array of pending blanks. In non-POSIX locales, blanks can
293 include characters other than spaces, so the blanks must be
294 stored, not merely counted. */
300 /* The worst case is a non-blank character, then one blank, then a
301 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
302 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
303 pending_blank = xmalloc (max_column_width);
307 /* Input character, or EOF. */
310 /* If true, perform translations. */
314 /* The following variables have valid values only when CONVERT
317 /* Column of next input character. */
318 uintmax_t column = 0;
320 /* Column the next input tab stop is on. */
321 uintmax_t next_tab_column = 0;
323 /* Index in TAB_LIST of next tab stop to examine. */
324 size_t tab_index = 0;
326 /* If true, the first pending blank came just before a tab stop. */
327 bool one_blank_before_tab_stop = false;
329 /* If true, the previous input character was a blank. This is
330 initially true, since initial strings of blanks are treated
331 as if the line was preceded by a blank. */
332 bool prev_blank = true;
334 /* Number of pending columns of blanks. */
338 /* Convert a line of text. */
342 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
347 bool blank = !! isblank (c);
351 if (next_tab_column <= column)
355 column + (tab_size - column % tab_size);
358 if (tab_index == first_free_tab)
365 uintmax_t tab = tab_list[tab_index++];
368 next_tab_column = tab;
376 if (next_tab_column < column)
377 error (EXIT_FAILURE, 0, _("input line is too long"));
381 column = next_tab_column;
383 /* Discard pending blanks, unless it was a single
384 blank just before the previous tab stop. */
385 if (! (pending == 1 && one_blank_before_tab_stop))
388 one_blank_before_tab_stop = false;
395 if (! (prev_blank && column == next_tab_column))
397 /* It is not yet known whether the pending blanks
398 will be replaced by tabs. */
399 if (column == next_tab_column)
400 one_blank_before_tab_stop = true;
401 pending_blank[pending++] = c;
406 /* Replace the pending blanks by a tab or two. */
407 pending_blank[0] = c = '\t';
408 pending = one_blank_before_tab_stop;
414 /* Go back one column, and force recalculation of the
417 next_tab_column = column;
418 tab_index -= !!tab_index;
424 error (EXIT_FAILURE, 0, _("input line is too long"));
429 if (fwrite (pending_blank, 1, pending, stdout) != pending)
430 error (EXIT_FAILURE, errno, _("write error"));
432 one_blank_before_tab_stop = false;
436 convert &= convert_entire_line | blank;
441 free (pending_blank);
446 error (EXIT_FAILURE, errno, _("write error"));
453 main (int argc, char **argv)
455 bool have_tabval = false;
456 uintmax_t tabval IF_LINT (= 0);
459 /* If true, cancel the effect of any -a (explicit or implicit in -t),
460 so that only leading blanks will be considered. */
461 bool convert_first_only = false;
463 initialize_main (&argc, &argv);
464 set_program_name (argv[0]);
465 setlocale (LC_ALL, "");
466 bindtextdomain (PACKAGE, LOCALEDIR);
467 textdomain (PACKAGE);
469 atexit (close_stdout);
471 have_read_stdin = false;
472 exit_status = EXIT_SUCCESS;
473 convert_entire_line = false;
477 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
483 usage (EXIT_FAILURE);
485 convert_entire_line = true;
488 convert_entire_line = true;
489 parse_tab_stops (optarg);
491 case CONVERT_FIRST_ONLY_OPTION:
492 convert_first_only = true;
496 add_tab_stop (tabval);
499 case_GETOPT_HELP_CHAR;
500 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
507 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
508 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
513 if (convert_first_only)
514 convert_entire_line = false;
517 add_tab_stop (tabval);
519 validate_tab_stops (tab_list, first_free_tab);
521 if (first_free_tab == 0)
522 tab_size = max_column_width = 8;
523 else if (first_free_tab == 1)
524 tab_size = tab_list[0];
528 file_list = (optind < argc ? &argv[optind] : stdin_argv);
532 if (have_read_stdin && fclose (stdin) != 0)
533 error (EXIT_FAILURE, errno, "-");