1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
48 #include "long-options.h"
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #if defined __MSDOS__ || defined _WIN32
55 /* Define this to non-zero on systems for which the regular mechanism
56 (of unlinking an open file and expecting to be able to write, seek
57 back to the beginning, then reread it) doesn't work. E.g., on Windows
59 # define DONT_UNLINK_WHILE_OPEN 1
63 #ifndef DEFAULT_TMPDIR
64 # define DEFAULT_TMPDIR "/tmp"
67 /* The number of bytes per atomic read. */
68 #define INITIAL_READSIZE 8192
70 /* The number of bytes per atomic write. */
71 #define WRITESIZE 8192
75 /* The name this program was run with. */
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* If nonzero, print `separator' along with the record preceding it
82 in the file; otherwise with the record following it. */
83 static int separator_ends_record;
85 /* 0 if `separator' is to be matched as a regular expression;
86 otherwise, the length of `separator', used as a sentinel to
88 static int sentinel_length;
90 /* The length of a match with `separator'. If `sentinel_length' is 0,
91 `match_length' is computed every time a match succeeds;
92 otherwise, it is simply the length of `separator'. */
93 static int match_length;
95 /* The input buffer. */
96 static char *G_buffer;
98 /* The number of bytes to read at once into `buffer'. */
99 static size_t read_size;
101 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
102 The extra 2 bytes allow `past_end' to have a value beyond the
103 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
104 static unsigned G_buffer_size;
106 /* The compiled regular expression representing `separator'. */
107 static struct re_pattern_buffer compiled_separator;
109 static struct option const longopts[] =
111 {"before", no_argument, NULL, 'b'},
112 {"regex", no_argument, NULL, 'r'},
113 {"separator", required_argument, NULL, 's'},
121 fprintf (stderr, _("Try `%s --help' for more information.\n"),
126 Usage: %s [OPTION]... [FILE]...\n\
130 Write each FILE to standard output, last line first.\n\
131 With no FILE, or when FILE is -, read standard input.\n\
133 -b, --before attach the separator before instead of after\n\
134 -r, --regex interpret the separator as a regular expression\n\
135 -s, --separator=STRING use STRING as the separator instead of newline\n\
136 --help display this help and exit\n\
137 --version output version information and exit\n\
139 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
141 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
144 /* Print the characters from START to PAST_END - 1.
145 If START is NULL, just flush the buffer. */
148 output (const char *start, const char *past_end)
150 static char buffer[WRITESIZE];
151 static int bytes_in_buffer = 0;
152 int bytes_to_add = past_end - start;
153 int bytes_available = WRITESIZE - bytes_in_buffer;
157 fwrite (buffer, 1, bytes_in_buffer, stdout);
162 /* Write out as many full buffers as possible. */
163 while (bytes_to_add >= bytes_available)
165 memcpy (buffer + bytes_in_buffer, start, bytes_available);
166 bytes_to_add -= bytes_available;
167 start += bytes_available;
168 fwrite (buffer, 1, WRITESIZE, stdout);
170 bytes_available = WRITESIZE;
173 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
174 bytes_in_buffer += bytes_to_add;
177 /* Print in reverse the file open on descriptor FD for reading FILE.
178 Return 0 if ok, 1 if an error occurs. */
181 tac_seekable (int input_fd, const char *file)
183 /* Pointer to the location in `G_buffer' where the search for
184 the next separator will begin. */
187 /* Pointer to one past the rightmost character in `G_buffer' that
188 has not been printed yet. */
191 /* Length of the record growing in `G_buffer'. */
192 size_t saved_record_size;
194 /* Offset in the file of the next read. */
197 /* Nonzero if `output' has not been called yet for any file.
198 Only used when the separator is attached to the preceding record. */
200 char first_char = *separator; /* Speed optimization, non-regexp. */
201 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
202 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
203 struct re_registers regs;
205 /* Find the size of the input file. */
206 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
208 return 0; /* It's an empty file. */
210 /* Arrange for the first read to lop off enough to leave the rest of the
211 file a multiple of `read_size'. Since `read_size' can change, this may
212 not always hold during the program run, but since it usually will, leave
213 it here for i/o efficiency (page/sector boundaries and all that).
214 Note: the efficiency gain has not been verified. */
215 saved_record_size = file_pos % read_size;
216 if (saved_record_size == 0)
217 saved_record_size = read_size;
218 file_pos -= saved_record_size;
219 /* `file_pos' now points to the start of the last (probably partial) block
220 in the input file. */
222 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
223 error (0, errno, "%s: seek failed", file);
225 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
227 error (0, errno, "%s", file);
231 match_start = past_end = G_buffer + saved_record_size;
232 /* For non-regexp search, move past impossible positions for a match. */
234 match_start -= match_length1;
238 /* Search backward from `match_start' - 1 to `G_buffer' for a match
239 with `separator'; for speed, use strncmp if `separator' contains no
241 If the match succeeds, set `match_start' to point to the start of
242 the match and `match_length' to the length of the match.
243 Otherwise, make `match_start' < `G_buffer'. */
244 if (sentinel_length == 0)
246 int i = match_start - G_buffer;
249 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
251 match_start = G_buffer - 1;
254 error (EXIT_FAILURE, 0,
255 _("error in regular expression search"));
259 match_start = G_buffer + regs.start[0];
260 match_length = regs.end[0] - regs.start[0];
265 /* `match_length' is constant for non-regexp boundaries. */
266 while (*--match_start != first_char
267 || (match_length1 && strncmp (match_start + 1, separator1,
272 /* Check whether we backed off the front of `G_buffer' without finding
273 a match for `separator'. */
274 if (match_start < G_buffer)
278 /* Hit the beginning of the file; print the remaining record. */
279 output (G_buffer, past_end);
283 saved_record_size = past_end - G_buffer;
284 if (saved_record_size > read_size)
286 /* `G_buffer_size' is about twice `read_size', so since
287 we want to read in another `read_size' bytes before
288 the data already in `G_buffer', we need to increase
291 int offset = sentinel_length ? sentinel_length : 1;
294 G_buffer_size = read_size * 2 + sentinel_length + 2;
295 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
297 /* Adjust the pointers for the new buffer location. */
298 match_start += newbuffer - G_buffer;
299 past_end += newbuffer - G_buffer;
300 G_buffer = newbuffer;
303 /* Back up to the start of the next bufferfull of the file. */
304 if (file_pos >= read_size)
305 file_pos -= read_size;
308 read_size = file_pos;
311 lseek (input_fd, file_pos, SEEK_SET);
313 /* Shift the pending record data right to make room for the new.
314 The source and destination regions probably overlap. */
315 memmove (G_buffer + read_size, G_buffer, saved_record_size);
316 past_end = G_buffer + read_size + saved_record_size;
317 /* For non-regexp searches, avoid unneccessary scanning. */
319 match_start = G_buffer + read_size;
321 match_start = past_end;
323 if (safe_read (input_fd, G_buffer, read_size) != read_size)
325 error (0, errno, "%s", file);
331 /* Found a match of `separator'. */
332 if (separator_ends_record)
334 char *match_end = match_start + match_length;
336 /* If this match of `separator' isn't at the end of the
337 file, print the record. */
338 if (first_time == 0 || match_end != past_end)
339 output (match_end, past_end);
340 past_end = match_end;
345 output (match_start, past_end);
346 past_end = match_start;
349 /* For non-regex matching, we can back up. */
350 if (sentinel_length > 0)
351 match_start -= match_length - 1;
356 /* Print FILE in reverse.
357 Return 0 if ok, 1 if an error occurs. */
360 tac_file (const char *file)
365 in = fopen (file, "r");
368 error (0, errno, "%s", file);
371 SET_BINARY (fileno (in));
372 errors = tac_seekable (fileno (in), file);
373 if (ferror (in) || fclose (in) == EOF)
375 error (0, errno, "%s", file);
381 #if DONT_UNLINK_WHILE_OPEN
383 static const char *file_to_remove;
384 static FILE *fp_to_close;
387 unlink_tempfile (void)
389 fclose (fp_to_close);
390 unlink (file_to_remove);
394 record_tempfile (const char *fn, FILE *fp)
400 atexit (unlink_tempfile);
406 /* Make a copy of the standard input in `FIXME'. */
409 save_stdin (FILE **g_tmp, char **g_tempfile)
411 static char *template = NULL;
412 static char *tempdir;
413 static char *tempfile;
418 if (template == NULL)
420 tempdir = getenv ("TMPDIR");
422 tempdir = DEFAULT_TMPDIR;
423 template = xmalloc (strlen (tempdir) + 11);
425 sprintf (template, "%s/tacXXXXXX", tempdir);
426 tempfile = mktemp (template);
428 /* Open temporary file exclusively, to foil a common
429 denial-of-service attack. */
430 fd = open (tempfile, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
432 error (EXIT_FAILURE, errno, "%s", tempfile);
434 tmp = fdopen (fd, "w+");
436 error (EXIT_FAILURE, errno, "%s", tempfile);
438 #if DONT_UNLINK_WHILE_OPEN
439 record_tempfile (tempfile, tmp);
446 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
450 error (EXIT_FAILURE, errno, _("stdin: read error"));
452 /* Don't bother checking for failure inside the loop -- check after. */
453 fwrite (G_buffer, 1, bytes_read, tmp);
456 if (ferror (tmp) || fflush (tmp) == EOF)
457 error (EXIT_FAILURE, errno, "%s", tempfile);
461 SET_BINARY (fileno (tmp));
463 *g_tempfile = tempfile;
466 /* Print the standard input in reverse, saving it to temporary
467 file first if it is a pipe.
468 Return 0 if ok, 1 if an error occurs. */
476 /* No tempfile is needed for "tac < file".
477 Use fstat instead of checking for errno == ESPIPE because
478 lseek doesn't work on some special files but doesn't return an
480 if (fstat (STDIN_FILENO, &stats))
482 error (0, errno, _("standard input"));
486 if (S_ISREG (stats.st_mode))
488 errors = tac_seekable (fileno (stdin), _("standard input"));
494 save_stdin (&tmp_stream, &tmp_file);
495 errors = tac_seekable (fileno (tmp_stream), tmp_file);
501 /* BUF_END points one byte past the end of the buffer to be searched. */
504 memrchr (const char *buf_start, const char *buf_end, int c)
506 const char *p = buf_end;
507 while (buf_start <= --p)
509 if (*(const unsigned char *) p == c)
516 /* FIXME: describe */
519 tac_mem (const char *buf, size_t n_bytes, FILE *out)
527 nl = memrchr (buf, buf + n_bytes, '\n');
528 bol = (nl == NULL ? buf : nl + 1);
530 /* If the last line of the input file has no terminating newline,
531 treat it as a special case. */
532 if (bol < buf + n_bytes)
534 /* Print out the line from bol to end of input. */
535 fwrite (bol, 1, (buf + n_bytes) - bol, out);
537 /* Add a newline here. Otherwise, the first and second lines
538 of output would appear to have been joined. */
542 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
544 /* Output the line (which includes a trailing newline)
545 from NL+1 to BOL-1. */
546 fwrite (nl + 1, 1, bol - (nl + 1), out);
551 /* If there's anything left, output the last line: BUF .. BOL-1.
552 When the first byte of the input is a newline, there is nothing
555 fwrite (buf, 1, bol - buf, out);
557 /* FIXME: this is work in progress.... */
561 /* FIXME: describe */
564 tac_stdin_to_mem (void)
567 size_t bufsiz = 8 * BUFSIZ;
568 size_t delta = 8 * BUFSIZ;
575 buf = (char *) malloc (bufsiz);
577 buf = (char *) realloc (buf, bufsiz);
581 /* Free the buffer and fall back on the code that relies on a
587 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
591 error (EXIT_FAILURE, errno, _("stdin: read error"));
592 n_bytes += bytes_read;
597 tac_mem (buf, n_bytes, stdout);
604 main (int argc, char **argv)
606 const char *error_message; /* Return value from re_compile_pattern. */
608 int have_read_stdin = 0;
610 program_name = argv[0];
611 setlocale (LC_ALL, "");
612 bindtextdomain (PACKAGE, LOCALEDIR);
613 textdomain (PACKAGE);
615 parse_long_options (argc, argv, "tac", GNU_PACKAGE, VERSION,
616 "Jay Lepreau and David MacKenzie", usage);
621 separator_ends_record = 1;
623 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
630 separator_ends_record = 0;
638 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
645 if (sentinel_length == 0)
647 compiled_separator.allocated = 100;
648 compiled_separator.buffer = (unsigned char *)
649 xmalloc (compiled_separator.allocated);
650 compiled_separator.fastmap = xmalloc (256);
651 compiled_separator.translate = 0;
652 error_message = re_compile_pattern (separator, strlen (separator),
653 &compiled_separator);
655 error (EXIT_FAILURE, 0, "%s", error_message);
658 match_length = sentinel_length = strlen (separator);
660 read_size = INITIAL_READSIZE;
661 /* A precaution that will probably never be needed. */
662 while (sentinel_length * 2 >= read_size)
664 G_buffer_size = read_size * 2 + sentinel_length + 2;
665 G_buffer = xmalloc (G_buffer_size);
668 strcpy (G_buffer, separator);
669 G_buffer += sentinel_length;
679 /* We need binary I/O, since `tac' relies
680 on `lseek' and byte counts. */
681 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
682 errors = tac_stdin ();
686 for (; optind < argc; ++optind)
688 if (STREQ (argv[optind], "-"))
691 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
692 errors |= tac_stdin ();
696 /* Binary output will leave the lines' ends (NL or
697 CR/LF) intact when the output is a disk file.
698 Writing a file with CR/LF pairs at end of lines in
699 text mode has no visible effect on console output,
700 since two CRs in a row are just like one CR. */
701 SET_BINARY (STDOUT_FILENO);
702 errors |= tac_file (argv[optind]);
707 /* Flush the output buffer. */
708 output ((char *) NULL, (char *) NULL);
710 if (have_read_stdin && close (0) < 0)
711 error (EXIT_FAILURE, errno, "-");
712 if (ferror (stdout) || fclose (stdout) == EOF)
713 error (EXIT_FAILURE, errno, _("write error"));
714 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);