1 /* unexpand - convert blanks to tabs
2 Copyright (C) 89, 91, 1995-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By default, convert only maximal strings of initial blanks and tabs
20 Preserves backspace characters in the output; they decrement the
21 column count for tab calculations.
22 The default action is equivalent to -8.
25 --tabs=tab1[,tab2[,...]]
27 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
28 columns apart instead of the default 8. Otherwise,
29 set the tabs at columns tab1, tab2, etc. (numbered from
30 0); preserve any blanks beyond the tab stops given.
32 -a Use tabs wherever they would replace 2 or more blanks,
33 not just at the beginnings of lines.
35 David MacKenzie <djm@gnu.ai.mit.edu> */
41 #include <sys/types.h>
48 /* The official name of this program (e.g., no `g' prefix). */
49 #define PROGRAM_NAME "unexpand"
51 #define AUTHORS "David MacKenzie"
53 /* The number of bytes added at a time to the amount of memory
54 allocated for the output line. */
55 #define OUTPUT_BLOCK 256
57 /* The name this program was run with. */
60 /* If true, convert blanks even after nonblank characters have been
62 static bool convert_entire_line;
64 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
65 static size_t tab_size;
67 /* The maximum distance between tab stops. */
68 static size_t max_column_width;
70 /* Array of the explicit column numbers of the tab stops;
71 after `tab_list' is exhausted, the rest of the line is printed
72 unchanged. The first column is column 0. */
73 static uintmax_t *tab_list;
75 /* The number of allocated entries in `tab_list'. */
76 static size_t n_tabs_allocated;
78 /* The index of the first invalid element of `tab_list',
79 where the next element can be added. */
80 static size_t first_free_tab;
82 /* Null-terminated array of input filenames. */
83 static char **file_list;
85 /* Default for `file_list' if no files are given on the command line. */
86 static char *stdin_argv[] =
91 /* True if we have ever read standard input. */
92 static bool have_read_stdin;
94 /* The desired exit status. */
95 static int exit_status;
97 /* For long options that have no equivalent short option, use a
98 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
101 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
104 static struct option const longopts[] =
106 {"tabs", required_argument, NULL, 't'},
107 {"all", no_argument, NULL, 'a'},
108 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
109 {GETOPT_HELP_OPTION_DECL},
110 {GETOPT_VERSION_OPTION_DECL},
117 if (status != EXIT_SUCCESS)
118 fprintf (stderr, _("Try `%s --help' for more information.\n"),
123 Usage: %s [OPTION]... [FILE]...\n\
127 Convert blanks in each FILE to tabs, writing to standard output.\n\
128 With no FILE, or when FILE is -, read standard input.\n\
132 Mandatory arguments to long options are mandatory for short options too.\n\
135 -a, --all convert all blanks, instead of just initial blanks\n\
136 --first-only convert only leading sequences of blanks (overrides -a)\n\
137 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
138 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
140 fputs (HELP_OPTION_DESCRIPTION, stdout);
141 fputs (VERSION_OPTION_DESCRIPTION, stdout);
142 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
147 /* Add tab stop TABVAL to the end of `tab_list'. */
150 add_tab_stop (uintmax_t tabval)
152 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
153 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
155 if (first_free_tab == n_tabs_allocated)
156 tab_list = x2nrealloc (tab_list, &n_tabs_allocated, sizeof *tab_list);
157 tab_list[first_free_tab++] = tabval;
159 if (max_column_width < column_width)
161 if (SIZE_MAX < column_width)
162 error (EXIT_FAILURE, 0, _("tabs are too far apart"));
163 max_column_width = column_width;
167 /* Add the comma or blank separated list of tab stops STOPS
168 to the list of tab stops. */
171 parse_tab_stops (char const *stops)
173 bool have_tabval = false;
174 uintmax_t tabval IF_LINT (= 0);
175 char const *num_start IF_LINT (= NULL);
178 for (; *stops; stops++)
180 if (*stops == ',' || ISBLANK (to_uchar (*stops)))
183 add_tab_stop (tabval);
186 else if (ISDIGIT (*stops))
195 /* Detect overflow. */
196 uintmax_t new_t = 10 * tabval + *stops - '0';
197 if (UINTMAX_MAX / 10 < tabval || new_t < tabval * 10)
199 size_t len = strspn (num_start, "0123456789");
200 char *bad_num = xstrndup (num_start, len);
201 error (0, 0, _("tab stop is too large %s"), quote (bad_num));
204 stops = num_start + len - 1;
211 error (0, 0, _("tab size contains invalid character(s): %s"),
222 add_tab_stop (tabval);
225 /* Check that the list of tab stops TABS, with ENTRIES entries,
226 contains only nonzero, ascending values. */
229 validate_tab_stops (uintmax_t const *tabs, size_t entries)
231 uintmax_t prev_tab = 0;
234 for (i = 0; i < entries; i++)
237 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
238 if (tabs[i] <= prev_tab)
239 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
244 /* Close the old stream pointer FP if it is non-NULL,
245 and return a new one opened to read the next input file.
246 Open a filename of `-' as the standard input.
247 Return NULL if there are no more input files. */
252 static char *prev_file;
259 error (0, errno, "%s", prev_file);
260 exit_status = EXIT_FAILURE;
263 clearerr (fp); /* Also clear EOF. */
264 else if (fclose (fp) != 0)
266 error (0, errno, "%s", prev_file);
267 exit_status = EXIT_FAILURE;
271 while ((file = *file_list++) != NULL)
273 if (file[0] == '-' && file[1] == '\0')
275 have_read_stdin = true;
279 fp = fopen (file, "r");
285 error (0, errno, "%s", file);
286 exit_status = EXIT_FAILURE;
291 /* Change blanks to tabs, writing to stdout.
292 Read each file in `file_list', in order. */
298 FILE *fp = next_file (NULL);
300 /* The array of pending blanks. In non-POSIX locales, blanks can
301 include characters other than spaces, so the blanks must be
302 stored, not merely counted. */
308 /* Binary I/O will preserve the original EOL style (DOS/Unix) of files. */
309 SET_BINARY2 (fileno (fp), STDOUT_FILENO);
311 /* The worst case is a non-blank character, then one blank, then a
312 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
313 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
314 pending_blank = xmalloc (max_column_width);
318 /* Input character, or EOF. */
321 /* If true, perform translations. */
325 /* The following variables have valid values only when CONVERT
328 /* Column of next input character. */
329 uintmax_t column = 0;
331 /* Column the next input tab stop is on. */
332 uintmax_t next_tab_column = 0;
334 /* Index in TAB_LIST of next tab stop to examine. */
335 size_t tab_index = 0;
337 /* If true, the first pending blank came just before a tab stop. */
338 bool one_blank_before_tab_stop = false;
340 /* If true, the previous input character was a blank. This is
341 initially true, since initial strings of blanks are treated
342 as if the line was preceded by a blank. */
343 bool prev_blank = true;
345 /* Number of pending columns of blanks. */
349 /* Convert a line of text. */
353 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
354 SET_BINARY2 (fileno (fp), STDOUT_FILENO);
358 bool blank = ISBLANK (c);
362 if (next_tab_column <= column)
366 column + (tab_size - column % tab_size);
369 if (tab_index == first_free_tab)
376 uintmax_t tab = tab_list[tab_index++];
379 next_tab_column = tab;
387 if (next_tab_column < column)
388 error (EXIT_FAILURE, 0, _("input line is too long"));
392 column = next_tab_column;
394 /* Discard pending blanks, unless it was a single
395 blank just before the previous tab stop. */
396 if (! (pending == 1 && one_blank_before_tab_stop))
399 one_blank_before_tab_stop = false;
406 if (! (prev_blank && column == next_tab_column))
408 /* It is not yet known whether the pending blanks
409 will be replaced by tabs. */
410 if (column == next_tab_column)
411 one_blank_before_tab_stop = true;
412 pending_blank[pending++] = c;
417 /* Replace the pending blanks by a tab or two. */
418 pending_blank[0] = c = '\t';
419 pending = one_blank_before_tab_stop;
425 /* Go back one column, and force recalculation of the
428 next_tab_column = column;
429 tab_index -= !!tab_index;
435 error (EXIT_FAILURE, 0, _("input line is too long"));
440 if (fwrite (pending_blank, 1, pending, stdout) != pending)
441 error (EXIT_FAILURE, errno, _("write error"));
443 one_blank_before_tab_stop = false;
447 convert &= convert_entire_line | blank;
452 free (pending_blank);
457 error (EXIT_FAILURE, errno, _("write error"));
464 main (int argc, char **argv)
466 bool have_tabval = false;
467 uintmax_t tabval IF_LINT (= 0);
470 /* If true, cancel the effect of any -a (explicit or implicit in -t),
471 so that only leading blanks will be considered. */
472 bool convert_first_only = false;
474 bool obsolete_tablist = false;
476 initialize_main (&argc, &argv);
477 program_name = argv[0];
478 setlocale (LC_ALL, "");
479 bindtextdomain (PACKAGE, LOCALEDIR);
480 textdomain (PACKAGE);
482 atexit (close_stdout);
484 have_read_stdin = false;
485 exit_status = EXIT_SUCCESS;
486 convert_entire_line = false;
490 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
496 usage (EXIT_FAILURE);
498 convert_entire_line = true;
501 convert_entire_line = true;
502 parse_tab_stops (optarg);
504 case CONVERT_FIRST_ONLY_OPTION:
505 convert_first_only = true;
509 add_tab_stop (tabval);
511 obsolete_tablist = true;
513 case_GETOPT_HELP_CHAR;
514 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
522 uintmax_t new_t = tabval * 10 + c - '0';
523 if (UINTMAX_MAX / 10 < tabval || new_t < tabval * 10)
524 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
527 obsolete_tablist = true;
532 if (obsolete_tablist && 200112 <= posix2_version ())
535 _("`-LIST' option is obsolete; use `--first-only -t LIST'"));
536 usage (EXIT_FAILURE);
539 if (convert_first_only)
540 convert_entire_line = false;
543 add_tab_stop (tabval);
545 validate_tab_stops (tab_list, first_free_tab);
547 if (first_free_tab == 0)
548 tab_size = max_column_width = 8;
549 else if (first_free_tab == 1)
550 tab_size = tab_list[0];
554 file_list = (optind < argc ? &argv[optind] : stdin_argv);
558 if (have_read_stdin && fclose (stdin) != 0)
559 error (EXIT_FAILURE, errno, "-");