1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS "Jay Lepreau and David MacKenzie"
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 # define DONT_UNLINK_WHILE_OPEN 1
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
77 /* The name this program was run with. */
80 /* The string that separates the records of the file. */
81 static char *separator;
83 /* If nonzero, print `separator' along with the record preceding it
84 in the file; otherwise with the record following it. */
85 static int separator_ends_record;
87 /* 0 if `separator' is to be matched as a regular expression;
88 otherwise, the length of `separator', used as a sentinel to
90 static int sentinel_length;
92 /* The length of a match with `separator'. If `sentinel_length' is 0,
93 `match_length' is computed every time a match succeeds;
94 otherwise, it is simply the length of `separator'. */
95 static int match_length;
97 /* The input buffer. */
98 static char *G_buffer;
100 /* The number of bytes to read at once into `buffer'. */
101 static size_t read_size;
103 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
104 The extra 2 bytes allow `past_end' to have a value beyond the
105 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
106 static unsigned G_buffer_size;
108 /* The compiled regular expression representing `separator'. */
109 static struct re_pattern_buffer compiled_separator;
111 static struct option const longopts[] =
113 {"before", no_argument, NULL, 'b'},
114 {"regex", no_argument, NULL, 'r'},
115 {"separator", required_argument, NULL, 's'},
116 {GETOPT_HELP_OPTION_DECL},
117 {GETOPT_VERSION_OPTION_DECL},
125 fprintf (stderr, _("Try `%s --help' for more information.\n"),
130 Usage: %s [OPTION]... [FILE]...\n\
134 Write each FILE to standard output, last line first.\n\
135 With no FILE, or when FILE is -, read standard input.\n\
137 -b, --before attach the separator before instead of after\n\
138 -r, --regex interpret the separator as a regular expression\n\
139 -s, --separator=STRING use STRING as the separator instead of newline\n\
140 --help display this help and exit\n\
141 --version output version information and exit\n\
143 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
145 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
148 /* Print the characters from START to PAST_END - 1.
149 If START is NULL, just flush the buffer. */
152 output (const char *start, const char *past_end)
154 static char buffer[WRITESIZE];
155 static int bytes_in_buffer = 0;
156 int bytes_to_add = past_end - start;
157 int bytes_available = WRITESIZE - bytes_in_buffer;
161 fwrite (buffer, 1, bytes_in_buffer, stdout);
166 /* Write out as many full buffers as possible. */
167 while (bytes_to_add >= bytes_available)
169 memcpy (buffer + bytes_in_buffer, start, bytes_available);
170 bytes_to_add -= bytes_available;
171 start += bytes_available;
172 fwrite (buffer, 1, WRITESIZE, stdout);
174 bytes_available = WRITESIZE;
177 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
178 bytes_in_buffer += bytes_to_add;
181 /* Print in reverse the file open on descriptor FD for reading FILE.
182 Return 0 if ok, 1 if an error occurs. */
185 tac_seekable (int input_fd, const char *file)
187 /* Pointer to the location in `G_buffer' where the search for
188 the next separator will begin. */
191 /* Pointer to one past the rightmost character in `G_buffer' that
192 has not been printed yet. */
195 /* Length of the record growing in `G_buffer'. */
196 size_t saved_record_size;
198 /* Offset in the file of the next read. */
201 /* Nonzero if `output' has not been called yet for any file.
202 Only used when the separator is attached to the preceding record. */
204 char first_char = *separator; /* Speed optimization, non-regexp. */
205 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
206 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
207 struct re_registers regs;
209 /* Find the size of the input file. */
210 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
212 return 0; /* It's an empty file. */
214 /* Arrange for the first read to lop off enough to leave the rest of the
215 file a multiple of `read_size'. Since `read_size' can change, this may
216 not always hold during the program run, but since it usually will, leave
217 it here for i/o efficiency (page/sector boundaries and all that).
218 Note: the efficiency gain has not been verified. */
219 saved_record_size = file_pos % read_size;
220 if (saved_record_size == 0)
221 saved_record_size = read_size;
222 file_pos -= saved_record_size;
223 /* `file_pos' now points to the start of the last (probably partial) block
224 in the input file. */
226 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
227 error (0, errno, "%s: seek failed", file);
229 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
231 error (0, errno, "%s", file);
235 match_start = past_end = G_buffer + saved_record_size;
236 /* For non-regexp search, move past impossible positions for a match. */
238 match_start -= match_length1;
242 /* Search backward from `match_start' - 1 to `G_buffer' for a match
243 with `separator'; for speed, use strncmp if `separator' contains no
245 If the match succeeds, set `match_start' to point to the start of
246 the match and `match_length' to the length of the match.
247 Otherwise, make `match_start' < `G_buffer'. */
248 if (sentinel_length == 0)
250 int i = match_start - G_buffer;
253 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
255 match_start = G_buffer - 1;
258 error (EXIT_FAILURE, 0,
259 _("error in regular expression search"));
263 match_start = G_buffer + regs.start[0];
264 match_length = regs.end[0] - regs.start[0];
269 /* `match_length' is constant for non-regexp boundaries. */
270 while (*--match_start != first_char
271 || (match_length1 && strncmp (match_start + 1, separator1,
276 /* Check whether we backed off the front of `G_buffer' without finding
277 a match for `separator'. */
278 if (match_start < G_buffer)
282 /* Hit the beginning of the file; print the remaining record. */
283 output (G_buffer, past_end);
287 saved_record_size = past_end - G_buffer;
288 if (saved_record_size > read_size)
290 /* `G_buffer_size' is about twice `read_size', so since
291 we want to read in another `read_size' bytes before
292 the data already in `G_buffer', we need to increase
295 int offset = sentinel_length ? sentinel_length : 1;
298 G_buffer_size = read_size * 2 + sentinel_length + 2;
299 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
301 /* Adjust the pointers for the new buffer location. */
302 match_start += newbuffer - G_buffer;
303 past_end += newbuffer - G_buffer;
304 G_buffer = newbuffer;
307 /* Back up to the start of the next bufferfull of the file. */
308 if (file_pos >= read_size)
309 file_pos -= read_size;
312 read_size = file_pos;
315 lseek (input_fd, file_pos, SEEK_SET);
317 /* Shift the pending record data right to make room for the new.
318 The source and destination regions probably overlap. */
319 memmove (G_buffer + read_size, G_buffer, saved_record_size);
320 past_end = G_buffer + read_size + saved_record_size;
321 /* For non-regexp searches, avoid unneccessary scanning. */
323 match_start = G_buffer + read_size;
325 match_start = past_end;
327 if (safe_read (input_fd, G_buffer, read_size) != read_size)
329 error (0, errno, "%s", file);
335 /* Found a match of `separator'. */
336 if (separator_ends_record)
338 char *match_end = match_start + match_length;
340 /* If this match of `separator' isn't at the end of the
341 file, print the record. */
342 if (first_time == 0 || match_end != past_end)
343 output (match_end, past_end);
344 past_end = match_end;
349 output (match_start, past_end);
350 past_end = match_start;
353 /* For non-regex matching, we can back up. */
354 if (sentinel_length > 0)
355 match_start -= match_length - 1;
360 /* Print FILE in reverse.
361 Return 0 if ok, 1 if an error occurs. */
364 tac_file (const char *file)
369 in = fopen (file, "r");
372 error (0, errno, "%s", file);
375 SET_BINARY (fileno (in));
376 errors = tac_seekable (fileno (in), file);
377 if (ferror (in) || fclose (in) == EOF)
379 error (0, errno, "%s", file);
385 #if DONT_UNLINK_WHILE_OPEN
387 static const char *file_to_remove;
388 static FILE *fp_to_close;
391 unlink_tempfile (void)
393 fclose (fp_to_close);
394 unlink (file_to_remove);
398 record_tempfile (const char *fn, FILE *fp)
404 atexit (unlink_tempfile);
410 /* Make a copy of the standard input in `FIXME'. */
413 save_stdin (FILE **g_tmp, char **g_tempfile)
415 static char *template = NULL;
416 static char *tempdir;
417 static char *tempfile;
422 if (template == NULL)
424 tempdir = getenv ("TMPDIR");
426 tempdir = DEFAULT_TMPDIR;
427 template = xmalloc (strlen (tempdir) + 11);
429 sprintf (template, "%s/tacXXXXXX", tempdir);
430 tempfile = mktemp (template);
432 /* Open temporary file exclusively, to foil a common
433 denial-of-service attack. */
434 fd = open (tempfile, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
436 error (EXIT_FAILURE, errno, "%s", tempfile);
438 tmp = fdopen (fd, "w+");
440 error (EXIT_FAILURE, errno, "%s", tempfile);
442 #if DONT_UNLINK_WHILE_OPEN
443 record_tempfile (tempfile, tmp);
450 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
454 error (EXIT_FAILURE, errno, _("stdin: read error"));
456 /* Don't bother checking for failure inside the loop -- check after. */
457 fwrite (G_buffer, 1, bytes_read, tmp);
460 if (ferror (tmp) || fflush (tmp) == EOF)
461 error (EXIT_FAILURE, errno, "%s", tempfile);
465 SET_BINARY (fileno (tmp));
467 *g_tempfile = tempfile;
470 /* Print the standard input in reverse, saving it to temporary
471 file first if it is a pipe.
472 Return 0 if ok, 1 if an error occurs. */
480 /* No tempfile is needed for "tac < file".
481 Use fstat instead of checking for errno == ESPIPE because
482 lseek doesn't work on some special files but doesn't return an
484 if (fstat (STDIN_FILENO, &stats))
486 error (0, errno, _("standard input"));
490 if (S_ISREG (stats.st_mode))
492 errors = tac_seekable (fileno (stdin), _("standard input"));
498 save_stdin (&tmp_stream, &tmp_file);
499 errors = tac_seekable (fileno (tmp_stream), tmp_file);
506 /* BUF_END points one byte past the end of the buffer to be searched. */
509 memrchr (const char *buf_start, const char *buf_end, int c)
511 const char *p = buf_end;
512 while (buf_start <= --p)
514 if (*(const unsigned char *) p == c)
520 /* FIXME: describe */
523 tac_mem (const char *buf, size_t n_bytes, FILE *out)
531 nl = memrchr (buf, buf + n_bytes, '\n');
532 bol = (nl == NULL ? buf : nl + 1);
534 /* If the last line of the input file has no terminating newline,
535 treat it as a special case. */
536 if (bol < buf + n_bytes)
538 /* Print out the line from bol to end of input. */
539 fwrite (bol, 1, (buf + n_bytes) - bol, out);
541 /* Add a newline here. Otherwise, the first and second lines
542 of output would appear to have been joined. */
546 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
548 /* Output the line (which includes a trailing newline)
549 from NL+1 to BOL-1. */
550 fwrite (nl + 1, 1, bol - (nl + 1), out);
555 /* If there's anything left, output the last line: BUF .. BOL-1.
556 When the first byte of the input is a newline, there is nothing
559 fwrite (buf, 1, bol - buf, out);
561 /* FIXME: this is work in progress.... */
565 /* FIXME: describe */
568 tac_stdin_to_mem (void)
571 size_t bufsiz = 8 * BUFSIZ;
572 size_t delta = 8 * BUFSIZ;
579 buf = (char *) malloc (bufsiz);
581 buf = (char *) realloc (buf, bufsiz);
585 /* Free the buffer and fall back on the code that relies on a
591 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
595 error (EXIT_FAILURE, errno, _("stdin: read error"));
596 n_bytes += bytes_read;
601 tac_mem (buf, n_bytes, stdout);
608 main (int argc, char **argv)
610 const char *error_message; /* Return value from re_compile_pattern. */
612 int have_read_stdin = 0;
614 program_name = argv[0];
615 setlocale (LC_ALL, "");
616 bindtextdomain (PACKAGE, LOCALEDIR);
617 textdomain (PACKAGE);
619 atexit (close_stdout);
624 separator_ends_record = 1;
626 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
633 separator_ends_record = 0;
641 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
643 case_GETOPT_HELP_CHAR;
644 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
650 if (sentinel_length == 0)
652 compiled_separator.allocated = 100;
653 compiled_separator.buffer = (unsigned char *)
654 xmalloc (compiled_separator.allocated);
655 compiled_separator.fastmap = xmalloc (256);
656 compiled_separator.translate = 0;
657 error_message = re_compile_pattern (separator, strlen (separator),
658 &compiled_separator);
660 error (EXIT_FAILURE, 0, "%s", error_message);
663 match_length = sentinel_length = strlen (separator);
665 read_size = INITIAL_READSIZE;
666 /* A precaution that will probably never be needed. */
667 while (sentinel_length * 2 >= read_size)
669 G_buffer_size = read_size * 2 + sentinel_length + 2;
670 G_buffer = xmalloc (G_buffer_size);
673 strcpy (G_buffer, separator);
674 G_buffer += sentinel_length;
684 /* We need binary I/O, since `tac' relies
685 on `lseek' and byte counts. */
686 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
687 errors = tac_stdin ();
691 for (; optind < argc; ++optind)
693 if (STREQ (argv[optind], "-"))
696 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
697 errors |= tac_stdin ();
701 /* Binary output will leave the lines' ends (NL or
702 CR/LF) intact when the output is a disk file.
703 Writing a file with CR/LF pairs at end of lines in
704 text mode has no visible effect on console output,
705 since two CRs in a row are just like one CR. */
706 SET_BINARY (STDOUT_FILENO);
707 errors |= tac_file (argv[optind]);
712 /* Flush the output buffer. */
713 output ((char *) NULL, (char *) NULL);
715 if (have_read_stdin && close (0) < 0)
716 error (EXIT_FAILURE, errno, "-");
717 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);