1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
50 #include "safe-read.h"
52 /* The official name of this program (e.g., no `g' prefix). */
53 #define PROGRAM_NAME "tac"
55 #define AUTHORS "Jay Lepreau", "David MacKenzie"
57 #if defined __MSDOS__ || defined _WIN32
58 /* Define this to non-zero on systems for which the regular mechanism
59 (of unlinking an open file and expecting to be able to write, seek
60 back to the beginning, then reread it) doesn't work. E.g., on Windows
62 # define DONT_UNLINK_WHILE_OPEN 1
66 #ifndef DEFAULT_TMPDIR
67 # define DEFAULT_TMPDIR "/tmp"
70 /* The number of bytes per atomic read. */
71 #define INITIAL_READSIZE 8192
73 /* The number of bytes per atomic write. */
74 #define WRITESIZE 8192
76 /* The name this program was run with. */
79 /* The string that separates the records of the file. */
80 static char *separator;
82 /* True if we have ever read standard input. */
83 static bool have_read_stdin = false;
85 /* If true, print `separator' along with the record preceding it
86 in the file; otherwise with the record following it. */
87 static bool separator_ends_record;
89 /* 0 if `separator' is to be matched as a regular expression;
90 otherwise, the length of `separator', used as a sentinel to
92 static size_t sentinel_length;
94 /* The length of a match with `separator'. If `sentinel_length' is 0,
95 `match_length' is computed every time a match succeeds;
96 otherwise, it is simply the length of `separator'. */
97 static size_t match_length;
99 /* The input buffer. */
100 static char *G_buffer;
102 /* The number of bytes to read at once into `buffer'. */
103 static size_t read_size;
105 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
106 The extra 2 bytes allow `past_end' to have a value beyond the
107 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
108 static size_t G_buffer_size;
110 /* The compiled regular expression representing `separator'. */
111 static struct re_pattern_buffer compiled_separator;
113 static struct option const longopts[] =
115 {"before", no_argument, NULL, 'b'},
116 {"regex", no_argument, NULL, 'r'},
117 {"separator", required_argument, NULL, 's'},
118 {GETOPT_HELP_OPTION_DECL},
119 {GETOPT_VERSION_OPTION_DECL},
126 if (status != EXIT_SUCCESS)
127 fprintf (stderr, _("Try `%s --help' for more information.\n"),
132 Usage: %s [OPTION]... [FILE]...\n\
136 Write each FILE to standard output, last line first.\n\
137 With no FILE, or when FILE is -, read standard input.\n\
141 Mandatory arguments to long options are mandatory for short options too.\n\
144 -b, --before attach the separator before instead of after\n\
145 -r, --regex interpret the separator as a regular expression\n\
146 -s, --separator=STRING use STRING as the separator instead of newline\n\
148 fputs (HELP_OPTION_DESCRIPTION, stdout);
149 fputs (VERSION_OPTION_DESCRIPTION, stdout);
150 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
155 /* Print the characters from START to PAST_END - 1.
156 If START is NULL, just flush the buffer. */
159 output (const char *start, const char *past_end)
161 static char buffer[WRITESIZE];
162 static size_t bytes_in_buffer = 0;
163 size_t bytes_to_add = past_end - start;
164 size_t bytes_available = WRITESIZE - bytes_in_buffer;
168 fwrite (buffer, 1, bytes_in_buffer, stdout);
173 /* Write out as many full buffers as possible. */
174 while (bytes_to_add >= bytes_available)
176 memcpy (buffer + bytes_in_buffer, start, bytes_available);
177 bytes_to_add -= bytes_available;
178 start += bytes_available;
179 fwrite (buffer, 1, WRITESIZE, stdout);
181 bytes_available = WRITESIZE;
184 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
185 bytes_in_buffer += bytes_to_add;
188 /* Print in reverse the file open on descriptor FD for reading FILE.
189 Return true if successful. */
192 tac_seekable (int input_fd, const char *file)
194 /* Pointer to the location in `G_buffer' where the search for
195 the next separator will begin. */
198 /* Pointer to one past the rightmost character in `G_buffer' that
199 has not been printed yet. */
202 /* Length of the record growing in `G_buffer'. */
203 size_t saved_record_size;
205 /* Offset in the file of the next read. */
208 /* True if `output' has not been called yet for any file.
209 Only used when the separator is attached to the preceding record. */
210 bool first_time = true;
211 char first_char = *separator; /* Speed optimization, non-regexp. */
212 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
213 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
214 struct re_registers regs;
216 /* Find the size of the input file. */
217 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
219 return true; /* It's an empty file. */
221 /* Arrange for the first read to lop off enough to leave the rest of the
222 file a multiple of `read_size'. Since `read_size' can change, this may
223 not always hold during the program run, but since it usually will, leave
224 it here for i/o efficiency (page/sector boundaries and all that).
225 Note: the efficiency gain has not been verified. */
226 saved_record_size = file_pos % read_size;
227 if (saved_record_size == 0)
228 saved_record_size = read_size;
229 file_pos -= saved_record_size;
230 /* `file_pos' now points to the start of the last (probably partial) block
231 in the input file. */
233 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
234 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
236 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
238 error (0, errno, _("%s: read error"), quotearg_colon (file));
242 match_start = past_end = G_buffer + saved_record_size;
243 /* For non-regexp search, move past impossible positions for a match. */
245 match_start -= match_length1;
249 /* Search backward from `match_start' - 1 to `G_buffer' for a match
250 with `separator'; for speed, use strncmp if `separator' contains no
252 If the match succeeds, set `match_start' to point to the start of
253 the match and `match_length' to the length of the match.
254 Otherwise, make `match_start' < `G_buffer'. */
255 if (sentinel_length == 0)
257 ptrdiff_t i = match_start - G_buffer;
260 if (! (INT_MIN < i && i <= INT_MAX))
261 error (EXIT_FAILURE, 0, _("record too large"));
263 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
265 match_start = G_buffer - 1;
268 error (EXIT_FAILURE, 0,
269 _("error in regular expression search"));
273 match_start = G_buffer + regs.start[0];
274 match_length = regs.end[0] - regs.start[0];
279 /* `match_length' is constant for non-regexp boundaries. */
280 while (*--match_start != first_char
281 || (match_length1 && strncmp (match_start + 1, separator1,
286 /* Check whether we backed off the front of `G_buffer' without finding
287 a match for `separator'. */
288 if (match_start < G_buffer)
292 /* Hit the beginning of the file; print the remaining record. */
293 output (G_buffer, past_end);
297 saved_record_size = past_end - G_buffer;
298 if (saved_record_size > read_size)
300 /* `G_buffer_size' is about twice `read_size', so since
301 we want to read in another `read_size' bytes before
302 the data already in `G_buffer', we need to increase
305 size_t offset = sentinel_length ? sentinel_length : 1;
306 ptrdiff_t match_start_offset = match_start - G_buffer;
307 ptrdiff_t past_end_offset = past_end - G_buffer;
308 size_t old_G_buffer_size = G_buffer_size;
311 G_buffer_size = read_size * 2 + sentinel_length + 2;
312 if (G_buffer_size < old_G_buffer_size)
314 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
316 /* Adjust the pointers for the new buffer location. */
317 match_start = newbuffer + match_start_offset;
318 past_end = newbuffer + past_end_offset;
319 G_buffer = newbuffer;
322 /* Back up to the start of the next bufferfull of the file. */
323 if (file_pos >= read_size)
324 file_pos -= read_size;
327 read_size = file_pos;
330 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
331 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
333 /* Shift the pending record data right to make room for the new.
334 The source and destination regions probably overlap. */
335 memmove (G_buffer + read_size, G_buffer, saved_record_size);
336 past_end = G_buffer + read_size + saved_record_size;
337 /* For non-regexp searches, avoid unneccessary scanning. */
339 match_start = G_buffer + read_size;
341 match_start = past_end;
343 if (safe_read (input_fd, G_buffer, read_size) != read_size)
345 error (0, errno, _("%s: read error"), quotearg_colon (file));
351 /* Found a match of `separator'. */
352 if (separator_ends_record)
354 char *match_end = match_start + match_length;
356 /* If this match of `separator' isn't at the end of the
357 file, print the record. */
358 if (!first_time || match_end != past_end)
359 output (match_end, past_end);
360 past_end = match_end;
365 output (match_start, past_end);
366 past_end = match_start;
369 /* For non-regex matching, we can back up. */
370 if (sentinel_length > 0)
371 match_start -= match_length - 1;
376 #if DONT_UNLINK_WHILE_OPEN
378 static const char *file_to_remove;
379 static FILE *fp_to_close;
382 unlink_tempfile (void)
384 fclose (fp_to_close);
385 unlink (file_to_remove);
389 record_or_unlink_tempfile (char const *fn, FILE *fp)
395 atexit (unlink_tempfile);
402 record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
409 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
410 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
411 and file name. Return true if successful. */
414 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
416 static char *template = NULL;
417 static char *tempdir;
422 if (template == NULL)
424 char const * const Template = "%s/tacXXXXXX";
425 tempdir = getenv ("TMPDIR");
427 tempdir = DEFAULT_TMPDIR;
429 /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
430 template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
431 sprintf (template, Template, tempdir);
434 /* FIXME: there's a small window between a successful mkstemp call
435 and the unlink that's performed by record_or_unlink_tempfile.
436 If we're interrupted in that interval, this code fails to remove
437 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
438 the window is much larger -- it extends to the atexit-called
440 FIXME: clean up upon fatal signal. Don't block them, in case
441 $TMPFILE is a remote file system. */
444 fd = mkstemp (template);
447 error (0, errno, _("cannot create temporary file %s"), quote (tempfile));
451 tmp = fdopen (fd, "w+");
454 error (0, errno, _("cannot open %s for writing"), quote (tempfile));
460 record_or_unlink_tempfile (tempfile, tmp);
464 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
467 if (bytes_read == SAFE_READ_ERROR)
469 error (0, errno, _("%s: read error"), quotearg_colon (file));
473 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
475 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
480 if (fflush (tmp) != 0)
482 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
486 SET_BINARY (fileno (tmp));
488 *g_tempfile = tempfile;
496 /* Copy INPUT_FD to a temporary, then tac that file.
497 Return true if successful. */
500 tac_nonseekable (int input_fd, const char *file)
504 copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
505 return tac_seekable (fileno (tmp_stream), tmp_file);
508 /* Print FILE in reverse, copying it to a temporary
509 file first if it is not seekable.
510 Return true if successful. */
513 tac_file (const char *filename)
519 if (STREQ (filename, "-"))
521 have_read_stdin = true;
523 filename = _("standard input");
527 fd = open (filename, O_RDONLY);
530 error (0, errno, _("cannot open %s for reading"), quote (filename));
535 /* We need binary I/O, since `tac' relies
536 on `lseek' and byte counts.
538 Binary output will leave the lines' ends (NL or
539 CR/LF) intact when the output is a disk file.
540 Writing a file with CR/LF pairs at end of lines in
541 text mode has no visible effect on console output,
542 since two CRs in a row are just like one CR. */
543 SET_BINARY2 (fd, STDOUT_FILENO);
545 file_size = lseek (fd, (off_t) 0, SEEK_END);
548 ? tac_seekable (fd, filename)
549 : tac_nonseekable (fd, filename));
551 if (fd != STDIN_FILENO && close (fd) == -1)
553 error (0, errno, _("%s: read error"), quotearg_colon (filename));
560 /* BUF_END points one byte past the end of the buffer to be searched. */
562 /* FIXME: describe */
565 tac_mem (const char *buf, size_t n_bytes, FILE *out)
573 nl = memrchr (buf, buf + n_bytes, '\n');
574 bol = (nl == NULL ? buf : nl + 1);
576 /* If the last line of the input file has no terminating newline,
577 treat it as a special case. */
578 if (bol < buf + n_bytes)
580 /* Print out the line from bol to end of input. */
581 fwrite (bol, 1, (buf + n_bytes) - bol, out);
583 /* Add a newline here. Otherwise, the first and second lines
584 of output would appear to have been joined. */
588 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
590 /* Output the line (which includes a trailing newline)
591 from NL+1 to BOL-1. */
592 fwrite (nl + 1, 1, bol - (nl + 1), out);
597 /* If there's anything left, output the last line: BUF .. BOL-1.
598 When the first byte of the input is a newline, there is nothing
601 fwrite (buf, 1, bol - buf, out);
603 /* FIXME: this is work in progress.... */
606 /* FIXME: describe */
609 tac_stdin_to_mem (void)
612 size_t bufsiz = 8 * BUFSIZ;
613 size_t delta = 8 * BUFSIZ;
619 char *new_buf = realloc (buf, bufsiz);
623 /* Write contents of buf to a temporary file, ... */
626 /* Free the buffer and fall back on the code that relies on a
634 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
637 if (bytes_read == SAFE_READ_ERROR)
638 error (EXIT_FAILURE, errno, _("stdin: read error"));
639 n_bytes += bytes_read;
644 tac_mem (buf, n_bytes, stdout);
651 main (int argc, char **argv)
653 const char *error_message; /* Return value from re_compile_pattern. */
656 size_t half_buffer_size;
658 /* Initializer for file_list if no file-arguments
659 were specified on the command line. */
660 static char const *const default_file_list[] = {"-", NULL};
661 char const *const *file;
663 initialize_main (&argc, &argv);
664 program_name = argv[0];
665 setlocale (LC_ALL, "");
666 bindtextdomain (PACKAGE, LOCALEDIR);
667 textdomain (PACKAGE);
669 atexit (close_stdout);
673 separator_ends_record = true;
675 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
680 separator_ends_record = false;
688 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
690 case_GETOPT_HELP_CHAR;
691 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
693 usage (EXIT_FAILURE);
697 if (sentinel_length == 0)
699 compiled_separator.allocated = 100;
700 compiled_separator.buffer = xmalloc (compiled_separator.allocated);
701 compiled_separator.fastmap = xmalloc (256);
702 compiled_separator.translate = 0;
703 error_message = re_compile_pattern (separator, strlen (separator),
704 &compiled_separator);
706 error (EXIT_FAILURE, 0, "%s", error_message);
709 match_length = sentinel_length = strlen (separator);
711 read_size = INITIAL_READSIZE;
712 while (sentinel_length >= read_size / 2)
714 if (SIZE_MAX / 2 < read_size)
718 half_buffer_size = read_size + sentinel_length + 1;
719 G_buffer_size = 2 * half_buffer_size;
720 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
722 G_buffer = xmalloc (G_buffer_size);
725 strcpy (G_buffer, separator);
726 G_buffer += sentinel_length;
733 file = (optind < argc
734 ? (char const *const *) &argv[optind]
735 : default_file_list);
740 for (i = 0; file[i]; ++i)
741 ok &= tac_file (file[i]);
744 /* Flush the output buffer. */
745 output ((char *) NULL, (char *) NULL);
747 if (have_read_stdin && close (STDIN_FILENO) < 0)
748 error (EXIT_FAILURE, errno, "-");
749 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);