1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
50 #include "safe-read.h"
51 #include "unistd-safer.h"
53 /* The official name of this program (e.g., no `g' prefix). */
54 #define PROGRAM_NAME "tac"
56 #define AUTHORS "Jay Lepreau", "David MacKenzie"
58 #if defined __MSDOS__ || defined _WIN32
59 /* Define this to non-zero on systems for which the regular mechanism
60 (of unlinking an open file and expecting to be able to write, seek
61 back to the beginning, then reread it) doesn't work. E.g., on Windows
63 # define DONT_UNLINK_WHILE_OPEN 1
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
71 /* The number of bytes per atomic read. */
72 #define INITIAL_READSIZE 8192
74 /* The number of bytes per atomic write. */
75 #define WRITESIZE 8192
77 /* The name this program was run with. */
80 /* The string that separates the records of the file. */
81 static char *separator;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin = false;
86 /* If true, print `separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record;
90 /* 0 if `separator' is to be matched as a regular expression;
91 otherwise, the length of `separator', used as a sentinel to
93 static size_t sentinel_length;
95 /* The length of a match with `separator'. If `sentinel_length' is 0,
96 `match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of `separator'. */
98 static size_t match_length;
100 /* The input buffer. */
101 static char *G_buffer;
103 /* The number of bytes to read at once into `buffer'. */
104 static size_t read_size;
106 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow `past_end' to have a value beyond the
108 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
109 static size_t G_buffer_size;
111 /* The compiled regular expression representing `separator'. */
112 static struct re_pattern_buffer compiled_separator;
114 static struct option const longopts[] =
116 {"before", no_argument, NULL, 'b'},
117 {"regex", no_argument, NULL, 'r'},
118 {"separator", required_argument, NULL, 's'},
119 {GETOPT_HELP_OPTION_DECL},
120 {GETOPT_VERSION_OPTION_DECL},
127 if (status != EXIT_SUCCESS)
128 fprintf (stderr, _("Try `%s --help' for more information.\n"),
133 Usage: %s [OPTION]... [FILE]...\n\
137 Write each FILE to standard output, last line first.\n\
138 With no FILE, or when FILE is -, read standard input.\n\
142 Mandatory arguments to long options are mandatory for short options too.\n\
145 -b, --before attach the separator before instead of after\n\
146 -r, --regex interpret the separator as a regular expression\n\
147 -s, --separator=STRING use STRING as the separator instead of newline\n\
149 fputs (HELP_OPTION_DESCRIPTION, stdout);
150 fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
156 /* Print the characters from START to PAST_END - 1.
157 If START is NULL, just flush the buffer. */
160 output (const char *start, const char *past_end)
162 static char buffer[WRITESIZE];
163 static size_t bytes_in_buffer = 0;
164 size_t bytes_to_add = past_end - start;
165 size_t bytes_available = WRITESIZE - bytes_in_buffer;
169 fwrite (buffer, 1, bytes_in_buffer, stdout);
174 /* Write out as many full buffers as possible. */
175 while (bytes_to_add >= bytes_available)
177 memcpy (buffer + bytes_in_buffer, start, bytes_available);
178 bytes_to_add -= bytes_available;
179 start += bytes_available;
180 fwrite (buffer, 1, WRITESIZE, stdout);
182 bytes_available = WRITESIZE;
185 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
186 bytes_in_buffer += bytes_to_add;
189 /* Print in reverse the file open on descriptor FD for reading FILE.
190 Return true if successful. */
193 tac_seekable (int input_fd, const char *file)
195 /* Pointer to the location in `G_buffer' where the search for
196 the next separator will begin. */
199 /* Pointer to one past the rightmost character in `G_buffer' that
200 has not been printed yet. */
203 /* Length of the record growing in `G_buffer'. */
204 size_t saved_record_size;
206 /* Offset in the file of the next read. */
209 /* True if `output' has not been called yet for any file.
210 Only used when the separator is attached to the preceding record. */
211 bool first_time = true;
212 char first_char = *separator; /* Speed optimization, non-regexp. */
213 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
214 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
215 struct re_registers regs;
217 /* Find the size of the input file. */
218 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
220 return true; /* It's an empty file. */
222 /* Arrange for the first read to lop off enough to leave the rest of the
223 file a multiple of `read_size'. Since `read_size' can change, this may
224 not always hold during the program run, but since it usually will, leave
225 it here for i/o efficiency (page/sector boundaries and all that).
226 Note: the efficiency gain has not been verified. */
227 saved_record_size = file_pos % read_size;
228 if (saved_record_size == 0)
229 saved_record_size = read_size;
230 file_pos -= saved_record_size;
231 /* `file_pos' now points to the start of the last (probably partial) block
232 in the input file. */
234 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
237 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
239 error (0, errno, _("%s: read error"), quotearg_colon (file));
243 match_start = past_end = G_buffer + saved_record_size;
244 /* For non-regexp search, move past impossible positions for a match. */
246 match_start -= match_length1;
250 /* Search backward from `match_start' - 1 to `G_buffer' for a match
251 with `separator'; for speed, use strncmp if `separator' contains no
253 If the match succeeds, set `match_start' to point to the start of
254 the match and `match_length' to the length of the match.
255 Otherwise, make `match_start' < `G_buffer'. */
256 if (sentinel_length == 0)
258 ptrdiff_t i = match_start - G_buffer;
261 if (! (INT_MIN < i && i <= INT_MAX))
262 error (EXIT_FAILURE, 0, _("record too large"));
264 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
266 match_start = G_buffer - 1;
269 error (EXIT_FAILURE, 0,
270 _("error in regular expression search"));
274 match_start = G_buffer + regs.start[0];
275 match_length = regs.end[0] - regs.start[0];
280 /* `match_length' is constant for non-regexp boundaries. */
281 while (*--match_start != first_char
282 || (match_length1 && strncmp (match_start + 1, separator1,
287 /* Check whether we backed off the front of `G_buffer' without finding
288 a match for `separator'. */
289 if (match_start < G_buffer)
293 /* Hit the beginning of the file; print the remaining record. */
294 output (G_buffer, past_end);
298 saved_record_size = past_end - G_buffer;
299 if (saved_record_size > read_size)
301 /* `G_buffer_size' is about twice `read_size', so since
302 we want to read in another `read_size' bytes before
303 the data already in `G_buffer', we need to increase
306 size_t offset = sentinel_length ? sentinel_length : 1;
307 ptrdiff_t match_start_offset = match_start - G_buffer;
308 ptrdiff_t past_end_offset = past_end - G_buffer;
309 size_t old_G_buffer_size = G_buffer_size;
312 G_buffer_size = read_size * 2 + sentinel_length + 2;
313 if (G_buffer_size < old_G_buffer_size)
315 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
317 /* Adjust the pointers for the new buffer location. */
318 match_start = newbuffer + match_start_offset;
319 past_end = newbuffer + past_end_offset;
320 G_buffer = newbuffer;
323 /* Back up to the start of the next bufferfull of the file. */
324 if (file_pos >= read_size)
325 file_pos -= read_size;
328 read_size = file_pos;
331 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
332 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
334 /* Shift the pending record data right to make room for the new.
335 The source and destination regions probably overlap. */
336 memmove (G_buffer + read_size, G_buffer, saved_record_size);
337 past_end = G_buffer + read_size + saved_record_size;
338 /* For non-regexp searches, avoid unneccessary scanning. */
340 match_start = G_buffer + read_size;
342 match_start = past_end;
344 if (safe_read (input_fd, G_buffer, read_size) != read_size)
346 error (0, errno, _("%s: read error"), quotearg_colon (file));
352 /* Found a match of `separator'. */
353 if (separator_ends_record)
355 char *match_end = match_start + match_length;
357 /* If this match of `separator' isn't at the end of the
358 file, print the record. */
359 if (!first_time || match_end != past_end)
360 output (match_end, past_end);
361 past_end = match_end;
366 output (match_start, past_end);
367 past_end = match_start;
370 /* For non-regex matching, we can back up. */
371 if (sentinel_length > 0)
372 match_start -= match_length - 1;
377 #if DONT_UNLINK_WHILE_OPEN
379 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
380 Using atexit like this is wrong, since it can fail
381 when called e.g. 32 or more times.
382 But this isn't a big deal, since the code is used only on WOE/DOS
383 systems, and few people invoke tac on that many nonseekable files. */
385 static const char *file_to_remove;
386 static FILE *fp_to_close;
389 unlink_tempfile (void)
391 fclose (fp_to_close);
392 unlink (file_to_remove);
396 record_or_unlink_tempfile (char const *fn, FILE *fp)
402 atexit (unlink_tempfile);
409 record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
416 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
417 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
418 and file name. Return true if successful. */
421 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
423 static char *template = NULL;
424 static char *tempdir;
429 if (template == NULL)
431 char const * const Template = "%s/tacXXXXXX";
432 tempdir = getenv ("TMPDIR");
434 tempdir = DEFAULT_TMPDIR;
436 /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
437 template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
438 sprintf (template, Template, tempdir);
441 /* FIXME: there's a small window between a successful mkstemp call
442 and the unlink that's performed by record_or_unlink_tempfile.
443 If we're interrupted in that interval, this code fails to remove
444 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
445 the window is much larger -- it extends to the atexit-called
447 FIXME: clean up upon fatal signal. Don't block them, in case
448 $TMPFILE is a remote file system. */
451 fd = mkstemp (template);
454 error (0, errno, _("cannot create temporary file %s"), quote (tempfile));
458 if ((fd = fd_safer (fd)) < 0 || ! (tmp = fdopen (fd, "w+")))
460 error (0, errno, _("cannot open %s for writing"), quote (tempfile));
466 record_or_unlink_tempfile (tempfile, tmp);
470 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
473 if (bytes_read == SAFE_READ_ERROR)
475 error (0, errno, _("%s: read error"), quotearg_colon (file));
479 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
481 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
486 if (fflush (tmp) != 0)
488 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
492 SET_BINARY (fileno (tmp));
494 *g_tempfile = tempfile;
502 /* Copy INPUT_FD to a temporary, then tac that file.
503 Return true if successful. */
506 tac_nonseekable (int input_fd, const char *file)
510 return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)
511 && tac_seekable (fileno (tmp_stream), tmp_file));
514 /* Print FILE in reverse, copying it to a temporary
515 file first if it is not seekable.
516 Return true if successful. */
519 tac_file (const char *filename)
524 bool is_stdin = STREQ (filename, "-");
528 have_read_stdin = true;
530 filename = _("standard input");
534 fd = open (filename, O_RDONLY);
537 error (0, errno, _("cannot open %s for reading"), quote (filename));
542 /* We need binary I/O, since `tac' relies
543 on `lseek' and byte counts.
545 Binary output will leave the lines' ends (NL or
546 CR/LF) intact when the output is a disk file.
547 Writing a file with CR/LF pairs at end of lines in
548 text mode has no visible effect on console output,
549 since two CRs in a row are just like one CR. */
550 SET_BINARY2 (fd, STDOUT_FILENO);
552 file_size = lseek (fd, (off_t) 0, SEEK_END);
555 ? tac_seekable (fd, filename)
556 : tac_nonseekable (fd, filename));
558 if (!is_stdin && close (fd) != 0)
560 error (0, errno, _("%s: read error"), quotearg_colon (filename));
567 /* BUF_END points one byte past the end of the buffer to be searched. */
569 /* FIXME: describe */
572 tac_mem (const char *buf, size_t n_bytes, FILE *out)
580 nl = memrchr (buf, buf + n_bytes, '\n');
581 bol = (nl == NULL ? buf : nl + 1);
583 /* If the last line of the input file has no terminating newline,
584 treat it as a special case. */
585 if (bol < buf + n_bytes)
587 /* Print out the line from bol to end of input. */
588 fwrite (bol, 1, (buf + n_bytes) - bol, out);
590 /* Add a newline here. Otherwise, the first and second lines
591 of output would appear to have been joined. */
595 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
597 /* Output the line (which includes a trailing newline)
598 from NL+1 to BOL-1. */
599 fwrite (nl + 1, 1, bol - (nl + 1), out);
604 /* If there's anything left, output the last line: BUF .. BOL-1.
605 When the first byte of the input is a newline, there is nothing
608 fwrite (buf, 1, bol - buf, out);
610 /* FIXME: this is work in progress.... */
613 /* FIXME: describe */
616 tac_stdin_to_mem (void)
619 size_t bufsiz = 8 * BUFSIZ;
620 size_t delta = 8 * BUFSIZ;
626 char *new_buf = realloc (buf, bufsiz);
630 /* Write contents of buf to a temporary file, ... */
633 /* Free the buffer and fall back on the code that relies on a
641 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
644 if (bytes_read == SAFE_READ_ERROR)
645 error (EXIT_FAILURE, errno, _("stdin: read error"));
646 n_bytes += bytes_read;
651 tac_mem (buf, n_bytes, stdout);
658 main (int argc, char **argv)
660 const char *error_message; /* Return value from re_compile_pattern. */
663 size_t half_buffer_size;
665 /* Initializer for file_list if no file-arguments
666 were specified on the command line. */
667 static char const *const default_file_list[] = {"-", NULL};
668 char const *const *file;
670 initialize_main (&argc, &argv);
671 program_name = argv[0];
672 setlocale (LC_ALL, "");
673 bindtextdomain (PACKAGE, LOCALEDIR);
674 textdomain (PACKAGE);
676 atexit (close_stdout);
680 separator_ends_record = true;
682 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
687 separator_ends_record = false;
695 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
697 case_GETOPT_HELP_CHAR;
698 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
700 usage (EXIT_FAILURE);
704 if (sentinel_length == 0)
706 compiled_separator.allocated = 100;
707 compiled_separator.buffer = xmalloc (compiled_separator.allocated);
708 compiled_separator.fastmap = xmalloc (256);
709 compiled_separator.translate = NULL;
710 error_message = re_compile_pattern (separator, strlen (separator),
711 &compiled_separator);
713 error (EXIT_FAILURE, 0, "%s", error_message);
716 match_length = sentinel_length = strlen (separator);
718 read_size = INITIAL_READSIZE;
719 while (sentinel_length >= read_size / 2)
721 if (SIZE_MAX / 2 < read_size)
725 half_buffer_size = read_size + sentinel_length + 1;
726 G_buffer_size = 2 * half_buffer_size;
727 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
729 G_buffer = xmalloc (G_buffer_size);
732 strcpy (G_buffer, separator);
733 G_buffer += sentinel_length;
740 file = (optind < argc
741 ? (char const *const *) &argv[optind]
742 : default_file_list);
747 for (i = 0; file[i]; ++i)
748 ok &= tac_file (file[i]);
751 /* Flush the output buffer. */
752 output ((char *) NULL, (char *) NULL);
754 if (have_read_stdin && close (STDIN_FILENO) < 0)
755 error (EXIT_FAILURE, errno, "-");
756 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);