1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
48 #include "safe-read.h"
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "tac"
53 #define AUTHORS "Jay Lepreau", "David MacKenzie"
55 #if defined __MSDOS__ || defined _WIN32
56 /* Define this to non-zero on systems for which the regular mechanism
57 (of unlinking an open file and expecting to be able to write, seek
58 back to the beginning, then reread it) doesn't work. E.g., on Windows
60 # define DONT_UNLINK_WHILE_OPEN 1
64 #ifndef DEFAULT_TMPDIR
65 # define DEFAULT_TMPDIR "/tmp"
68 /* The number of bytes per atomic read. */
69 #define INITIAL_READSIZE 8192
71 /* The number of bytes per atomic write. */
72 #define WRITESIZE 8192
74 /* The name this program was run with. */
77 /* The string that separates the records of the file. */
78 static char *separator;
80 /* If nonzero, print `separator' along with the record preceding it
81 in the file; otherwise with the record following it. */
82 static int separator_ends_record;
84 /* 0 if `separator' is to be matched as a regular expression;
85 otherwise, the length of `separator', used as a sentinel to
87 static size_t sentinel_length;
89 /* The length of a match with `separator'. If `sentinel_length' is 0,
90 `match_length' is computed every time a match succeeds;
91 otherwise, it is simply the length of `separator'. */
92 static int match_length;
94 /* The input buffer. */
95 static char *G_buffer;
97 /* The number of bytes to read at once into `buffer'. */
98 static size_t read_size;
100 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
101 The extra 2 bytes allow `past_end' to have a value beyond the
102 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
103 static unsigned G_buffer_size;
105 /* The compiled regular expression representing `separator'. */
106 static struct re_pattern_buffer compiled_separator;
108 static struct option const longopts[] =
110 {"before", no_argument, NULL, 'b'},
111 {"regex", no_argument, NULL, 'r'},
112 {"separator", required_argument, NULL, 's'},
113 {GETOPT_HELP_OPTION_DECL},
114 {GETOPT_VERSION_OPTION_DECL},
122 fprintf (stderr, _("Try `%s --help' for more information.\n"),
127 Usage: %s [OPTION]... [FILE]...\n\
131 Write each FILE to standard output, last line first.\n\
132 With no FILE, or when FILE is -, read standard input.\n\
136 Mandatory arguments to long options are mandatory for short options too.\n\
139 -b, --before attach the separator before instead of after\n\
140 -r, --regex interpret the separator as a regular expression\n\
141 -s, --separator=STRING use STRING as the separator instead of newline\n\
143 fputs (HELP_OPTION_DESCRIPTION, stdout);
144 fputs (VERSION_OPTION_DESCRIPTION, stdout);
145 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
147 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
150 /* Print the characters from START to PAST_END - 1.
151 If START is NULL, just flush the buffer. */
154 output (const char *start, const char *past_end)
156 static char buffer[WRITESIZE];
157 static size_t bytes_in_buffer = 0;
158 size_t bytes_to_add = past_end - start;
159 size_t bytes_available = WRITESIZE - bytes_in_buffer;
163 fwrite (buffer, 1, bytes_in_buffer, stdout);
168 /* Write out as many full buffers as possible. */
169 while (bytes_to_add >= bytes_available)
171 memcpy (buffer + bytes_in_buffer, start, bytes_available);
172 bytes_to_add -= bytes_available;
173 start += bytes_available;
174 fwrite (buffer, 1, WRITESIZE, stdout);
176 bytes_available = WRITESIZE;
179 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
180 bytes_in_buffer += bytes_to_add;
183 /* Print in reverse the file open on descriptor FD for reading FILE.
184 Return 0 if ok, 1 if an error occurs. */
187 tac_seekable (int input_fd, const char *file)
189 /* Pointer to the location in `G_buffer' where the search for
190 the next separator will begin. */
193 /* Pointer to one past the rightmost character in `G_buffer' that
194 has not been printed yet. */
197 /* Length of the record growing in `G_buffer'. */
198 size_t saved_record_size;
200 /* Offset in the file of the next read. */
203 /* Nonzero if `output' has not been called yet for any file.
204 Only used when the separator is attached to the preceding record. */
206 char first_char = *separator; /* Speed optimization, non-regexp. */
207 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
208 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
209 struct re_registers regs;
211 /* Find the size of the input file. */
212 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
214 return 0; /* It's an empty file. */
216 /* Arrange for the first read to lop off enough to leave the rest of the
217 file a multiple of `read_size'. Since `read_size' can change, this may
218 not always hold during the program run, but since it usually will, leave
219 it here for i/o efficiency (page/sector boundaries and all that).
220 Note: the efficiency gain has not been verified. */
221 saved_record_size = file_pos % read_size;
222 if (saved_record_size == 0)
223 saved_record_size = read_size;
224 file_pos -= saved_record_size;
225 /* `file_pos' now points to the start of the last (probably partial) block
226 in the input file. */
228 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
229 error (0, errno, _("%s: seek failed"), file);
231 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
233 error (0, errno, "%s", file);
237 match_start = past_end = G_buffer + saved_record_size;
238 /* For non-regexp search, move past impossible positions for a match. */
240 match_start -= match_length1;
244 /* Search backward from `match_start' - 1 to `G_buffer' for a match
245 with `separator'; for speed, use strncmp if `separator' contains no
247 If the match succeeds, set `match_start' to point to the start of
248 the match and `match_length' to the length of the match.
249 Otherwise, make `match_start' < `G_buffer'. */
250 if (sentinel_length == 0)
252 int i = match_start - G_buffer;
255 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
257 match_start = G_buffer - 1;
260 error (EXIT_FAILURE, 0,
261 _("error in regular expression search"));
265 match_start = G_buffer + regs.start[0];
266 match_length = regs.end[0] - regs.start[0];
271 /* `match_length' is constant for non-regexp boundaries. */
272 while (*--match_start != first_char
273 || (match_length1 && strncmp (match_start + 1, separator1,
278 /* Check whether we backed off the front of `G_buffer' without finding
279 a match for `separator'. */
280 if (match_start < G_buffer)
284 /* Hit the beginning of the file; print the remaining record. */
285 output (G_buffer, past_end);
289 saved_record_size = past_end - G_buffer;
290 if (saved_record_size > read_size)
292 /* `G_buffer_size' is about twice `read_size', so since
293 we want to read in another `read_size' bytes before
294 the data already in `G_buffer', we need to increase
297 int offset = sentinel_length ? sentinel_length : 1;
300 G_buffer_size = read_size * 2 + sentinel_length + 2;
301 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
303 /* Adjust the pointers for the new buffer location. */
304 match_start += newbuffer - G_buffer;
305 past_end += newbuffer - G_buffer;
306 G_buffer = newbuffer;
309 /* Back up to the start of the next bufferfull of the file. */
310 if (file_pos >= read_size)
311 file_pos -= read_size;
314 read_size = file_pos;
317 lseek (input_fd, file_pos, SEEK_SET);
319 /* Shift the pending record data right to make room for the new.
320 The source and destination regions probably overlap. */
321 memmove (G_buffer + read_size, G_buffer, saved_record_size);
322 past_end = G_buffer + read_size + saved_record_size;
323 /* For non-regexp searches, avoid unneccessary scanning. */
325 match_start = G_buffer + read_size;
327 match_start = past_end;
329 if (safe_read (input_fd, G_buffer, read_size) != read_size)
331 error (0, errno, "%s", file);
337 /* Found a match of `separator'. */
338 if (separator_ends_record)
340 char *match_end = match_start + match_length;
342 /* If this match of `separator' isn't at the end of the
343 file, print the record. */
344 if (first_time == 0 || match_end != past_end)
345 output (match_end, past_end);
346 past_end = match_end;
351 output (match_start, past_end);
352 past_end = match_start;
355 /* For non-regex matching, we can back up. */
356 if (sentinel_length > 0)
357 match_start -= match_length - 1;
362 /* Print FILE in reverse.
363 Return 0 if ok, 1 if an error occurs. */
366 tac_file (const char *file)
371 in = fopen (file, "r");
374 error (0, errno, "%s", file);
377 SET_BINARY (fileno (in));
378 errors = tac_seekable (fileno (in), file);
379 if (ferror (in) || fclose (in) == EOF)
381 error (0, errno, "%s", file);
387 #if DONT_UNLINK_WHILE_OPEN
389 static const char *file_to_remove;
390 static FILE *fp_to_close;
393 unlink_tempfile (void)
395 fclose (fp_to_close);
396 unlink (file_to_remove);
400 record_tempfile (const char *fn, FILE *fp)
406 atexit (unlink_tempfile);
412 /* Make a copy of the standard input in `FIXME'. */
415 save_stdin (FILE **g_tmp, char **g_tempfile)
417 static char *template = NULL;
418 static char *tempdir;
423 if (template == NULL)
425 tempdir = getenv ("TMPDIR");
427 tempdir = DEFAULT_TMPDIR;
428 template = xmalloc (strlen (tempdir) + 11);
430 sprintf (template, "%s/tacXXXXXX", tempdir);
432 fd = mkstemp (template);
434 error (EXIT_FAILURE, errno, "%s", tempfile);
436 tmp = fdopen (fd, "w+");
438 error (EXIT_FAILURE, errno, "%s", tempfile);
440 #if DONT_UNLINK_WHILE_OPEN
441 record_tempfile (tempfile, tmp);
448 size_t bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
451 if (bytes_read == SAFE_READ_ERROR)
452 error (EXIT_FAILURE, errno, _("stdin: read error"));
454 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
458 if (ferror (tmp) || fflush (tmp) == EOF)
459 error (EXIT_FAILURE, errno, "%s", tempfile);
461 SET_BINARY (fileno (tmp));
463 *g_tempfile = tempfile;
466 /* Print the standard input in reverse, saving it to temporary
467 file first if it is a pipe.
468 Return 0 if ok, 1 if an error occurs. */
476 /* No tempfile is needed for "tac < file".
477 Use fstat instead of checking for errno == ESPIPE because
478 lseek doesn't work on some special files but doesn't return an
480 if (fstat (STDIN_FILENO, &stats))
482 error (0, errno, _("standard input"));
486 if (S_ISREG (stats.st_mode))
488 errors = tac_seekable (fileno (stdin), _("standard input"));
494 save_stdin (&tmp_stream, &tmp_file);
495 errors = tac_seekable (fileno (tmp_stream), tmp_file);
502 /* BUF_END points one byte past the end of the buffer to be searched. */
505 memrchr (const char *buf_start, const char *buf_end, int c)
507 const char *p = buf_end;
508 while (buf_start <= --p)
510 if (*(const unsigned char *) p == c)
516 /* FIXME: describe */
519 tac_mem (const char *buf, size_t n_bytes, FILE *out)
527 nl = memrchr (buf, buf + n_bytes, '\n');
528 bol = (nl == NULL ? buf : nl + 1);
530 /* If the last line of the input file has no terminating newline,
531 treat it as a special case. */
532 if (bol < buf + n_bytes)
534 /* Print out the line from bol to end of input. */
535 fwrite (bol, 1, (buf + n_bytes) - bol, out);
537 /* Add a newline here. Otherwise, the first and second lines
538 of output would appear to have been joined. */
542 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
544 /* Output the line (which includes a trailing newline)
545 from NL+1 to BOL-1. */
546 fwrite (nl + 1, 1, bol - (nl + 1), out);
551 /* If there's anything left, output the last line: BUF .. BOL-1.
552 When the first byte of the input is a newline, there is nothing
555 fwrite (buf, 1, bol - buf, out);
557 /* FIXME: this is work in progress.... */
561 /* FIXME: describe */
564 tac_stdin_to_mem (void)
567 size_t bufsiz = 8 * BUFSIZ;
568 size_t delta = 8 * BUFSIZ;
575 buf = (char *) malloc (bufsiz);
577 buf = (char *) realloc (buf, bufsiz);
581 /* Free the buffer and fall back on the code that relies on a
587 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
590 if (bytes_read == SAFE_READ_ERROR)
591 error (EXIT_FAILURE, errno, _("stdin: read error"));
592 n_bytes += bytes_read;
597 tac_mem (buf, n_bytes, stdout);
604 main (int argc, char **argv)
606 const char *error_message; /* Return value from re_compile_pattern. */
608 int have_read_stdin = 0;
610 initialize_main (&argc, &argv);
611 program_name = argv[0];
612 setlocale (LC_ALL, "");
613 bindtextdomain (PACKAGE, LOCALEDIR);
614 textdomain (PACKAGE);
616 atexit (close_stdout);
621 separator_ends_record = 1;
623 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
630 separator_ends_record = 0;
638 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
640 case_GETOPT_HELP_CHAR;
641 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
643 usage (EXIT_FAILURE);
647 if (sentinel_length == 0)
649 compiled_separator.allocated = 100;
650 compiled_separator.buffer = (unsigned char *)
651 xmalloc (compiled_separator.allocated);
652 compiled_separator.fastmap = xmalloc (256);
653 compiled_separator.translate = 0;
654 error_message = re_compile_pattern (separator, strlen (separator),
655 &compiled_separator);
657 error (EXIT_FAILURE, 0, "%s", error_message);
660 match_length = sentinel_length = strlen (separator);
662 read_size = INITIAL_READSIZE;
663 /* A precaution that will probably never be needed. */
664 while (sentinel_length * 2 >= read_size)
666 G_buffer_size = read_size * 2 + sentinel_length + 2;
667 G_buffer = xmalloc (G_buffer_size);
670 strcpy (G_buffer, separator);
671 G_buffer += sentinel_length;
681 /* We need binary I/O, since `tac' relies
682 on `lseek' and byte counts. */
683 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
684 errors = tac_stdin ();
688 for (; optind < argc; ++optind)
690 if (STREQ (argv[optind], "-"))
693 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
694 errors |= tac_stdin ();
698 /* Binary output will leave the lines' ends (NL or
699 CR/LF) intact when the output is a disk file.
700 Writing a file with CR/LF pairs at end of lines in
701 text mode has no visible effect on console output,
702 since two CRs in a row are just like one CR. */
703 SET_BINARY (STDOUT_FILENO);
704 errors |= tac_file (argv[optind]);
709 /* Flush the output buffer. */
710 output ((char *) NULL, (char *) NULL);
712 if (have_read_stdin && close (STDIN_FILENO) < 0)
713 error (EXIT_FAILURE, errno, "-");
714 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);