1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88, 89, 90, 91, 95, 96, 1997 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
61 /* The number of bytes per atomic read. */
62 #define INITIAL_READSIZE 8192
64 /* The number of bytes per atomic write. */
65 #define WRITESIZE 8192
73 /* The name this program was run with. */
76 /* The string that separates the records of the file. */
77 static char *separator;
79 /* If nonzero, print `separator' along with the record preceding it
80 in the file; otherwise with the record following it. */
81 static int separator_ends_record;
83 /* 0 if `separator' is to be matched as a regular expression;
84 otherwise, the length of `separator', used as a sentinel to
86 static int sentinel_length;
88 /* The length of a match with `separator'. If `sentinel_length' is 0,
89 `match_length' is computed every time a match succeeds;
90 otherwise, it is simply the length of `separator'. */
91 static int match_length;
93 /* The input buffer. */
94 static char *G_buffer;
96 /* The number of bytes to read at once into `buffer'. */
97 static unsigned read_size;
99 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
100 The extra 2 bytes allow `past_end' to have a value beyond the
101 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
102 static unsigned G_buffer_size;
104 /* The compiled regular expression representing `separator'. */
105 static struct re_pattern_buffer compiled_separator;
107 /* If nonzero, display usage information and exit. */
108 static int show_help;
110 /* If nonzero, print the version on standard output then exit. */
111 static int show_version;
113 static struct option const longopts[] =
115 {"before", no_argument, &separator_ends_record, 0},
116 {"regex", no_argument, &sentinel_length, 0},
117 {"separator", required_argument, NULL, 's'},
118 {"help", no_argument, &show_help, 1},
119 {"version", no_argument, &show_version, 1},
123 /* Read LEN bytes at PTR from descriptor DESC, retrying if interrupted.
124 Return the actual number of bytes read, zero for EOF, or negative
128 safe_read (int desc, char *ptr, int len)
138 n_chars = read (desc, ptr, len);
140 while (n_chars < 0 && errno == EINTR);
142 n_chars = read (desc, ptr, len);
152 fprintf (stderr, _("Try `%s --help' for more information.\n"),
157 Usage: %s [OPTION]... [FILE]...\n\
161 Write each FILE to standard output, last line first.\n\
162 With no FILE, or when FILE is -, read standard input.\n\
164 -b, --before attach the separator before instead of after\n\
165 -r, --regex interpret the separator as a regular expression\n\
166 -s, --separator=STRING use STRING as the separator instead of newline\n\
167 --help display this help and exit\n\
168 --version output version information and exit\n\
170 puts (_("\nReport bugs to <textutils-bugs@gnu.ai.mit.edu>."));
172 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
175 /* Print the characters from START to PAST_END - 1.
176 If START is NULL, just flush the buffer. */
179 output (const char *start, const char *past_end)
181 static char buffer[WRITESIZE];
182 static int bytes_in_buffer = 0;
183 int bytes_to_add = past_end - start;
184 int bytes_available = WRITESIZE - bytes_in_buffer;
188 fwrite (buffer, 1, bytes_in_buffer, stdout);
193 /* Write out as many full buffers as possible. */
194 while (bytes_to_add >= bytes_available)
196 memcpy (buffer + bytes_in_buffer, start, bytes_available);
197 bytes_to_add -= bytes_available;
198 start += bytes_available;
199 fwrite (buffer, 1, WRITESIZE, stdout);
201 bytes_available = WRITESIZE;
204 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
205 bytes_in_buffer += bytes_to_add;
208 /* Print in reverse the file open on descriptor FD for reading FILE.
209 Return 0 if ok, 1 if an error occurs. */
212 tac_stream (FILE *in, const char *file)
214 /* Pointer to the location in `G_buffer' where the search for
215 the next separator will begin. */
218 /* Pointer to one past the rightmost character in `G_buffer' that
219 has not been printed yet. */
222 /* Length of the record growing in `G_buffer'. */
223 unsigned saved_record_size;
225 /* Offset in the file of the next read. */
228 /* Nonzero if `output' has not been called yet for any file.
229 Only used when the separator is attached to the preceding record. */
231 char first_char = *separator; /* Speed optimization, non-regexp. */
232 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
233 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
234 struct re_registers regs;
236 /* Find the size of the input file. */
237 file_pos = lseek (fileno (in), (off_t) 0, SEEK_END);
239 return 0; /* It's an empty file. */
241 /* Arrange for the first read to lop off enough to leave the rest of the
242 file a multiple of `read_size'. Since `read_size' can change, this may
243 not always hold during the program run, but since it usually will, leave
244 it here for i/o efficiency (page/sector boundaries and all that).
245 Note: the efficiency gain has not been verified. */
246 saved_record_size = file_pos % read_size;
247 if (saved_record_size == 0)
248 saved_record_size = read_size;
249 file_pos -= saved_record_size;
250 /* `file_pos' now points to the start of the last (probably partial) block
251 in the input file. */
253 lseek (fileno (in), file_pos, SEEK_SET);
254 if (safe_read (fileno (in), G_buffer, saved_record_size) != saved_record_size)
256 error (0, errno, "%s", file);
260 match_start = past_end = G_buffer + saved_record_size;
261 /* For non-regexp search, move past impossible positions for a match. */
263 match_start -= match_length1;
267 /* Search backward from `match_start' - 1 to `G_buffer' for a match
268 with `separator'; for speed, use strncmp if `separator' contains no
270 If the match succeeds, set `match_start' to point to the start of
271 the match and `match_length' to the length of the match.
272 Otherwise, make `match_start' < `G_buffer'. */
273 if (sentinel_length == 0)
275 int i = match_start - G_buffer;
278 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
280 match_start = G_buffer - 1;
283 error (EXIT_FAILURE, 0,
284 _("error in regular expression search"));
288 match_start = G_buffer + regs.start[0];
289 match_length = regs.end[0] - regs.start[0];
294 /* `match_length' is constant for non-regexp boundaries. */
295 while (*--match_start != first_char
296 || (match_length1 && strncmp (match_start + 1, separator1,
301 /* Check whether we backed off the front of `G_buffer' without finding
302 a match for `separator'. */
303 if (match_start < G_buffer)
307 /* Hit the beginning of the file; print the remaining record. */
308 output (G_buffer, past_end);
312 saved_record_size = past_end - G_buffer;
313 if (saved_record_size > read_size)
315 /* `G_buffer_size' is about twice `read_size', so since
316 we want to read in another `read_size' bytes before
317 the data already in `G_buffer', we need to increase
320 int offset = sentinel_length ? sentinel_length : 1;
323 G_buffer_size = read_size * 2 + sentinel_length + 2;
324 newbuffer = xrealloc (G_buffer - offset, G_buffer_size) + offset;
325 /* Adjust the pointers for the new buffer location. */
326 match_start += newbuffer - G_buffer;
327 past_end += newbuffer - G_buffer;
328 G_buffer = newbuffer;
331 /* Back up to the start of the next bufferfull of the file. */
332 if (file_pos >= read_size)
333 file_pos -= read_size;
336 read_size = file_pos;
339 lseek (fileno (in), file_pos, SEEK_SET);
341 /* Shift the pending record data right to make room for the new.
342 The source and destination regions probably overlap. */
343 memmove (G_buffer + read_size, G_buffer, saved_record_size);
344 past_end = G_buffer + read_size + saved_record_size;
345 /* For non-regexp searches, avoid unneccessary scanning. */
347 match_start = G_buffer + read_size;
349 match_start = past_end;
351 if (safe_read (fileno (in), G_buffer, read_size) != read_size)
353 error (0, errno, "%s", file);
359 /* Found a match of `separator'. */
360 if (separator_ends_record)
362 char *match_end = match_start + match_length;
364 /* If this match of `separator' isn't at the end of the
365 file, print the record. */
366 if (first_time == 0 || match_end != past_end)
367 output (match_end, past_end);
368 past_end = match_end;
373 output (match_start, past_end);
374 past_end = match_start;
376 match_start -= match_length - 1;
381 /* Print FILE in reverse.
382 Return 0 if ok, 1 if an error occurs. */
385 tac_file (const char *file)
390 in = fopen (file, "r");
393 error (0, errno, "%s", file);
396 errors = tac_stream (in, file);
397 if (ferror (in) || fclose (in) == EOF)
399 error (0, errno, "%s", file);
405 /* Make a copy of the standard input in `FIXME'. */
408 save_stdin (FILE **g_tmp, char **g_tempfile)
410 static char *template = NULL;
411 static char *tempdir;
412 static char *tempfile;
417 if (template == NULL)
419 tempdir = getenv ("TMPDIR");
421 tempdir = DEFAULT_TMPDIR;
422 template = xmalloc (strlen (tempdir) + 11);
424 sprintf (template, "%s/tacXXXXXX", tempdir);
425 tempfile = mktemp (template);
427 fd = creat (tempfile, 0600);
428 if (fd == -1 || (tmp = fdopen (fd, "rw")) == NULL)
429 error (EXIT_FAILURE, errno, "%s", tempfile);
430 tmp = fdopen (fd, "rw");
432 error (EXIT_FAILURE, errno, "%s", tempfile);
435 while ((bytes_read = safe_read (0, G_buffer, read_size)) > 0)
436 fwrite (G_buffer, 1, bytes_read, tmp);
438 if (ferror (tmp) || fflush (tmp) == EOF)
439 error (EXIT_FAILURE, errno, "%s", tempfile);
441 if (fseek (tmp, (long int) 0, SEEK_SET))
442 error (EXIT_FAILURE, errno, "%s", tempfile);
444 if (bytes_read == -1)
445 error (EXIT_FAILURE, errno, _("read error"));
448 *g_tempfile = tempfile;
451 /* Print the standard input in reverse, saving it to temporary
452 file first if it is a pipe.
453 Return 0 if ok, 1 if an error occurs. */
458 /* Previous values of signal handlers. */
462 /* No tempfile is needed for "tac < file".
463 Use fstat instead of checking for errno == ESPIPE because
464 lseek doesn't work on some special files but doesn't return an
466 if (fstat (0, &stats))
468 error (0, errno, _("standard input"));
472 if (S_ISREG (stats.st_mode))
474 errors = tac_stream (stdin, _("standard input"));
480 save_stdin (&tmp_stream, &tmp_file);
481 errors = tac_stream (tmp_stream, tmp_file);
487 /* BUF_END_PLUS_ONE points one byte past the end of the buffer
491 memrchr (const char *buf_start, const char *buf_end_plus_one, int c)
493 const char *p = buf_end_plus_one;
494 while (buf_start <= --p)
496 if (*(const unsigned char *) p == c)
503 tac_mem (const char *buf, size_t n_bytes, FILE *out)
509 const char *nl = memrchr (buf, buf + n_bytes, '\n');
510 const char *bol = (nl == NULL ? buf : nl + 1);
512 /* If the last line of the input file has no terminating newline,
513 treat it as a special case. */
514 if (bol < buf + n_bytes)
516 /* Print out the line from bol to end of input. */
517 fwrite (bol, 1, (buf + n_bytes) - bol, out);
519 /* Add a newline here. Otherwise, the first and second lines
520 of output would appear to have been joined. */
524 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
526 /* Output the line (which includes a trailing newline)
527 from NL+1 to BOL-1. */
528 fwrite (nl + 1, 1, bol - 1 - (nl + 1), out);
533 /* If there's anything left, output the last line: BUF .. BOL-1.
534 When the first byte of the input is a newline, there is nothing
537 fwrite (buf, 1, bol - buf, out);
542 tac_stdin_to_mem (void)
545 size_t bufsiz = 8 * BUFSIZ;
546 size_t delta = 8 * BUFSIZ;
553 buf = (char *) malloc (bufsiz);
555 buf = (char *) realloc (buf, bufsiz);
559 /* Free the buffer and fall back on the code that relies on a
565 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
568 n_bytes += bytes_read;
570 error (1, errno, _("read error"));
575 tac_mem (buf, n_bytes, stdout);
581 main (int argc, char **argv)
583 const char *error_message; /* Return value from re_compile_pattern. */
585 int have_read_stdin = 0;
587 program_name = argv[0];
588 setlocale (LC_ALL, "");
589 bindtextdomain (PACKAGE, LOCALEDIR);
590 textdomain (PACKAGE);
595 separator_ends_record = 1;
597 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
604 separator_ends_record = 0;
612 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
621 printf ("tac (%s) %s\n", GNU_PACKAGE, VERSION);
628 if (sentinel_length == 0)
630 compiled_separator.allocated = 100;
631 compiled_separator.buffer = (unsigned char *)
632 xmalloc (compiled_separator.allocated);
633 compiled_separator.fastmap = xmalloc (256);
634 compiled_separator.translate = 0;
635 error_message = re_compile_pattern (separator, strlen (separator),
636 &compiled_separator);
638 error (EXIT_FAILURE, 0, "%s", error_message);
641 match_length = sentinel_length = strlen (separator);
643 read_size = INITIAL_READSIZE;
644 /* A precaution that will probably never be needed. */
645 while (sentinel_length * 2 >= read_size)
647 G_buffer_size = read_size * 2 + sentinel_length + 2;
648 G_buffer = xmalloc (G_buffer_size);
651 strcpy (G_buffer, separator);
652 G_buffer += sentinel_length;
660 errors = tac_stdin_to_mem ();
663 for (; optind < argc; ++optind)
665 if (strcmp (argv[optind], "-") == 0)
668 errors |= tac_stdin_to_mem ();
671 errors |= tac_file (argv[optind]);
674 /* Flush the output buffer. */
675 output ((char *) NULL, (char *) NULL);
677 if (have_read_stdin && close (0) < 0)
678 error (EXIT_FAILURE, errno, "-");
679 if (ferror (stdout) || fclose (stdout) == EOF)
680 error (EXIT_FAILURE, errno, _("write error"));
681 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);