1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
48 #include "safe-read.h"
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "tac"
53 #define AUTHORS "Jay Lepreau", "David MacKenzie"
55 #if defined __MSDOS__ || defined _WIN32
56 /* Define this to non-zero on systems for which the regular mechanism
57 (of unlinking an open file and expecting to be able to write, seek
58 back to the beginning, then reread it) doesn't work. E.g., on Windows
60 # define DONT_UNLINK_WHILE_OPEN 1
64 #ifndef DEFAULT_TMPDIR
65 # define DEFAULT_TMPDIR "/tmp"
68 /* The number of bytes per atomic read. */
69 #define INITIAL_READSIZE 8192
71 /* The number of bytes per atomic write. */
72 #define WRITESIZE 8192
74 /* The name this program was run with. */
77 /* The string that separates the records of the file. */
78 static char *separator;
80 /* If true, print `separator' along with the record preceding it
81 in the file; otherwise with the record following it. */
82 static bool separator_ends_record;
84 /* 0 if `separator' is to be matched as a regular expression;
85 otherwise, the length of `separator', used as a sentinel to
87 static size_t sentinel_length;
89 /* The length of a match with `separator'. If `sentinel_length' is 0,
90 `match_length' is computed every time a match succeeds;
91 otherwise, it is simply the length of `separator'. */
92 static size_t match_length;
94 /* The input buffer. */
95 static char *G_buffer;
97 /* The number of bytes to read at once into `buffer'. */
98 static size_t read_size;
100 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
101 The extra 2 bytes allow `past_end' to have a value beyond the
102 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
103 static size_t G_buffer_size;
105 /* The compiled regular expression representing `separator'. */
106 static struct re_pattern_buffer compiled_separator;
108 static struct option const longopts[] =
110 {"before", no_argument, NULL, 'b'},
111 {"regex", no_argument, NULL, 'r'},
112 {"separator", required_argument, NULL, 's'},
113 {GETOPT_HELP_OPTION_DECL},
114 {GETOPT_VERSION_OPTION_DECL},
121 if (status != EXIT_SUCCESS)
122 fprintf (stderr, _("Try `%s --help' for more information.\n"),
127 Usage: %s [OPTION]... [FILE]...\n\
131 Write each FILE to standard output, last line first.\n\
132 With no FILE, or when FILE is -, read standard input.\n\
136 Mandatory arguments to long options are mandatory for short options too.\n\
139 -b, --before attach the separator before instead of after\n\
140 -r, --regex interpret the separator as a regular expression\n\
141 -s, --separator=STRING use STRING as the separator instead of newline\n\
143 fputs (HELP_OPTION_DESCRIPTION, stdout);
144 fputs (VERSION_OPTION_DESCRIPTION, stdout);
145 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
150 /* Print the characters from START to PAST_END - 1.
151 If START is NULL, just flush the buffer. */
154 output (const char *start, const char *past_end)
156 static char buffer[WRITESIZE];
157 static size_t bytes_in_buffer = 0;
158 size_t bytes_to_add = past_end - start;
159 size_t bytes_available = WRITESIZE - bytes_in_buffer;
163 fwrite (buffer, 1, bytes_in_buffer, stdout);
168 /* Write out as many full buffers as possible. */
169 while (bytes_to_add >= bytes_available)
171 memcpy (buffer + bytes_in_buffer, start, bytes_available);
172 bytes_to_add -= bytes_available;
173 start += bytes_available;
174 fwrite (buffer, 1, WRITESIZE, stdout);
176 bytes_available = WRITESIZE;
179 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
180 bytes_in_buffer += bytes_to_add;
183 /* Print in reverse the file open on descriptor FD for reading FILE.
184 Return true if successful. */
187 tac_seekable (int input_fd, const char *file)
189 /* Pointer to the location in `G_buffer' where the search for
190 the next separator will begin. */
193 /* Pointer to one past the rightmost character in `G_buffer' that
194 has not been printed yet. */
197 /* Length of the record growing in `G_buffer'. */
198 size_t saved_record_size;
200 /* Offset in the file of the next read. */
203 /* True if `output' has not been called yet for any file.
204 Only used when the separator is attached to the preceding record. */
205 bool first_time = true;
206 char first_char = *separator; /* Speed optimization, non-regexp. */
207 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
208 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
209 struct re_registers regs;
211 /* Find the size of the input file. */
212 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
214 return true; /* It's an empty file. */
216 /* Arrange for the first read to lop off enough to leave the rest of the
217 file a multiple of `read_size'. Since `read_size' can change, this may
218 not always hold during the program run, but since it usually will, leave
219 it here for i/o efficiency (page/sector boundaries and all that).
220 Note: the efficiency gain has not been verified. */
221 saved_record_size = file_pos % read_size;
222 if (saved_record_size == 0)
223 saved_record_size = read_size;
224 file_pos -= saved_record_size;
225 /* `file_pos' now points to the start of the last (probably partial) block
226 in the input file. */
228 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
229 error (0, errno, _("%s: seek failed"), file);
231 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
233 error (0, errno, "%s", file);
237 match_start = past_end = G_buffer + saved_record_size;
238 /* For non-regexp search, move past impossible positions for a match. */
240 match_start -= match_length1;
244 /* Search backward from `match_start' - 1 to `G_buffer' for a match
245 with `separator'; for speed, use strncmp if `separator' contains no
247 If the match succeeds, set `match_start' to point to the start of
248 the match and `match_length' to the length of the match.
249 Otherwise, make `match_start' < `G_buffer'. */
250 if (sentinel_length == 0)
252 ptrdiff_t i = match_start - G_buffer;
255 if (! (INT_MIN < i && i <= INT_MAX))
256 error (EXIT_FAILURE, 0, _("record too large"));
258 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
260 match_start = G_buffer - 1;
263 error (EXIT_FAILURE, 0,
264 _("error in regular expression search"));
268 match_start = G_buffer + regs.start[0];
269 match_length = regs.end[0] - regs.start[0];
274 /* `match_length' is constant for non-regexp boundaries. */
275 while (*--match_start != first_char
276 || (match_length1 && strncmp (match_start + 1, separator1,
281 /* Check whether we backed off the front of `G_buffer' without finding
282 a match for `separator'. */
283 if (match_start < G_buffer)
287 /* Hit the beginning of the file; print the remaining record. */
288 output (G_buffer, past_end);
292 saved_record_size = past_end - G_buffer;
293 if (saved_record_size > read_size)
295 /* `G_buffer_size' is about twice `read_size', so since
296 we want to read in another `read_size' bytes before
297 the data already in `G_buffer', we need to increase
300 size_t offset = sentinel_length ? sentinel_length : 1;
301 ptrdiff_t match_start_offset = match_start - G_buffer;
302 ptrdiff_t past_end_offset = past_end - G_buffer;
303 size_t old_G_buffer_size = G_buffer_size;
306 G_buffer_size = read_size * 2 + sentinel_length + 2;
307 if (G_buffer_size < old_G_buffer_size)
309 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
311 /* Adjust the pointers for the new buffer location. */
312 match_start = newbuffer + match_start_offset;
313 past_end = newbuffer + past_end_offset;
314 G_buffer = newbuffer;
317 /* Back up to the start of the next bufferfull of the file. */
318 if (file_pos >= read_size)
319 file_pos -= read_size;
322 read_size = file_pos;
325 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
326 error (0, errno, _("%s: seek failed"), file);
328 /* Shift the pending record data right to make room for the new.
329 The source and destination regions probably overlap. */
330 memmove (G_buffer + read_size, G_buffer, saved_record_size);
331 past_end = G_buffer + read_size + saved_record_size;
332 /* For non-regexp searches, avoid unneccessary scanning. */
334 match_start = G_buffer + read_size;
336 match_start = past_end;
338 if (safe_read (input_fd, G_buffer, read_size) != read_size)
340 error (0, errno, "%s", file);
346 /* Found a match of `separator'. */
347 if (separator_ends_record)
349 char *match_end = match_start + match_length;
351 /* If this match of `separator' isn't at the end of the
352 file, print the record. */
353 if (!first_time || match_end != past_end)
354 output (match_end, past_end);
355 past_end = match_end;
360 output (match_start, past_end);
361 past_end = match_start;
364 /* For non-regex matching, we can back up. */
365 if (sentinel_length > 0)
366 match_start -= match_length - 1;
371 /* Print FILE in reverse.
372 Return true if successful. */
375 tac_file (const char *file)
380 in = fopen (file, "r");
383 error (0, errno, "%s", file);
386 SET_BINARY (fileno (in));
387 ok = tac_seekable (fileno (in), file);
388 if (fclose (in) != 0)
390 error (0, errno, "%s", file);
396 #if DONT_UNLINK_WHILE_OPEN
398 static const char *file_to_remove;
399 static FILE *fp_to_close;
402 unlink_tempfile (void)
404 fclose (fp_to_close);
405 unlink (file_to_remove);
409 record_tempfile (const char *fn, FILE *fp)
415 atexit (unlink_tempfile);
421 /* Make a copy of the standard input in `FIXME'. */
424 save_stdin (FILE **g_tmp, char **g_tempfile)
426 static char *template = NULL;
427 static char *tempdir;
432 if (template == NULL)
434 tempdir = getenv ("TMPDIR");
436 tempdir = DEFAULT_TMPDIR;
437 template = xmalloc (strlen (tempdir) + 11);
439 sprintf (template, "%s/tacXXXXXX", tempdir);
441 fd = mkstemp (template);
443 error (EXIT_FAILURE, errno, "%s", tempfile);
445 tmp = fdopen (fd, "w+");
447 error (EXIT_FAILURE, errno, "%s", tempfile);
449 #if DONT_UNLINK_WHILE_OPEN
450 record_tempfile (tempfile, tmp);
457 size_t bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
460 if (bytes_read == SAFE_READ_ERROR)
461 error (EXIT_FAILURE, errno, _("stdin: read error"));
463 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
464 error (EXIT_FAILURE, errno, "%s", tempfile);
467 if (fflush (tmp) != 0)
468 error (EXIT_FAILURE, errno, "%s", tempfile);
470 SET_BINARY (fileno (tmp));
472 *g_tempfile = tempfile;
475 /* Print the standard input in reverse, saving it to temporary
476 file first if it is a pipe.
477 Return true if successful. */
484 /* No tempfile is needed for "tac < file".
485 Use fstat instead of checking for errno == ESPIPE because
486 lseek doesn't work on some special files but doesn't return an
488 if (fstat (STDIN_FILENO, &stats))
490 error (0, errno, _("standard input"));
494 if (S_ISREG (stats.st_mode))
496 return tac_seekable (STDIN_FILENO, _("standard input"));
502 save_stdin (&tmp_stream, &tmp_file);
503 return tac_seekable (fileno (tmp_stream), tmp_file);
508 /* BUF_END points one byte past the end of the buffer to be searched. */
510 /* FIXME: describe */
513 tac_mem (const char *buf, size_t n_bytes, FILE *out)
521 nl = memrchr (buf, buf + n_bytes, '\n');
522 bol = (nl == NULL ? buf : nl + 1);
524 /* If the last line of the input file has no terminating newline,
525 treat it as a special case. */
526 if (bol < buf + n_bytes)
528 /* Print out the line from bol to end of input. */
529 fwrite (bol, 1, (buf + n_bytes) - bol, out);
531 /* Add a newline here. Otherwise, the first and second lines
532 of output would appear to have been joined. */
536 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
538 /* Output the line (which includes a trailing newline)
539 from NL+1 to BOL-1. */
540 fwrite (nl + 1, 1, bol - (nl + 1), out);
545 /* If there's anything left, output the last line: BUF .. BOL-1.
546 When the first byte of the input is a newline, there is nothing
549 fwrite (buf, 1, bol - buf, out);
551 /* FIXME: this is work in progress.... */
554 /* FIXME: describe */
557 tac_stdin_to_mem (void)
560 size_t bufsiz = 8 * BUFSIZ;
561 size_t delta = 8 * BUFSIZ;
567 char *new_buf = realloc (buf, bufsiz);
571 /* Write contents of buf to a temporary file, ... */
574 /* Free the buffer and fall back on the code that relies on a
582 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
585 if (bytes_read == SAFE_READ_ERROR)
586 error (EXIT_FAILURE, errno, _("stdin: read error"));
587 n_bytes += bytes_read;
592 tac_mem (buf, n_bytes, stdout);
599 main (int argc, char **argv)
601 const char *error_message; /* Return value from re_compile_pattern. */
604 bool have_read_stdin = false;
605 size_t half_buffer_size;
607 initialize_main (&argc, &argv);
608 program_name = argv[0];
609 setlocale (LC_ALL, "");
610 bindtextdomain (PACKAGE, LOCALEDIR);
611 textdomain (PACKAGE);
613 atexit (close_stdout);
617 separator_ends_record = true;
619 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
626 separator_ends_record = false;
634 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
636 case_GETOPT_HELP_CHAR;
637 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
639 usage (EXIT_FAILURE);
643 if (sentinel_length == 0)
645 compiled_separator.allocated = 100;
646 compiled_separator.buffer = xmalloc (compiled_separator.allocated);
647 compiled_separator.fastmap = xmalloc (256);
648 compiled_separator.translate = 0;
649 error_message = re_compile_pattern (separator, strlen (separator),
650 &compiled_separator);
652 error (EXIT_FAILURE, 0, "%s", error_message);
655 match_length = sentinel_length = strlen (separator);
657 read_size = INITIAL_READSIZE;
658 while (sentinel_length >= read_size / 2)
660 if (SIZE_MAX / 2 < read_size)
664 half_buffer_size = read_size + sentinel_length + 1;
665 G_buffer_size = 2 * half_buffer_size;
666 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
668 G_buffer = xmalloc (G_buffer_size);
671 strcpy (G_buffer, separator);
672 G_buffer += sentinel_length;
681 have_read_stdin = true;
682 /* We need binary I/O, since `tac' relies
683 on `lseek' and byte counts. */
684 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
690 for (; optind < argc; ++optind)
692 if (STREQ (argv[optind], "-"))
694 have_read_stdin = true;
695 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
700 /* Binary output will leave the lines' ends (NL or
701 CR/LF) intact when the output is a disk file.
702 Writing a file with CR/LF pairs at end of lines in
703 text mode has no visible effect on console output,
704 since two CRs in a row are just like one CR. */
705 SET_BINARY (STDOUT_FILENO);
706 ok &= tac_file (argv[optind]);
711 /* Flush the output buffer. */
712 output ((char *) NULL, (char *) NULL);
714 if (have_read_stdin && close (STDIN_FILENO) < 0)
715 error (EXIT_FAILURE, errno, "-");
716 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);