1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS N_ ("Jay Lepreau and David MacKenzie")
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 # define DONT_UNLINK_WHILE_OPEN 1
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
75 /* The name this program was run with. */
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* If nonzero, print `separator' along with the record preceding it
82 in the file; otherwise with the record following it. */
83 static int separator_ends_record;
85 /* 0 if `separator' is to be matched as a regular expression;
86 otherwise, the length of `separator', used as a sentinel to
88 static int sentinel_length;
90 /* The length of a match with `separator'. If `sentinel_length' is 0,
91 `match_length' is computed every time a match succeeds;
92 otherwise, it is simply the length of `separator'. */
93 static int match_length;
95 /* The input buffer. */
96 static char *G_buffer;
98 /* The number of bytes to read at once into `buffer'. */
99 static size_t read_size;
101 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
102 The extra 2 bytes allow `past_end' to have a value beyond the
103 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
104 static unsigned G_buffer_size;
106 /* The compiled regular expression representing `separator'. */
107 static struct re_pattern_buffer compiled_separator;
109 static struct option const longopts[] =
111 {"before", no_argument, NULL, 'b'},
112 {"regex", no_argument, NULL, 'r'},
113 {"separator", required_argument, NULL, 's'},
114 {GETOPT_HELP_OPTION_DECL},
115 {GETOPT_VERSION_OPTION_DECL},
123 fprintf (stderr, _("Try `%s --help' for more information.\n"),
128 Usage: %s [OPTION]... [FILE]...\n\
132 Write each FILE to standard output, last line first.\n\
133 With no FILE, or when FILE is -, read standard input.\n\
135 Mandatory arguments to long options are mandatory for short options too.\n\
136 -b, --before attach the separator before instead of after\n\
139 -r, --regex interpret the separator as a regular expression\n\
140 -s, --separator=STRING use STRING as the separator instead of newline\n\
141 --help display this help and exit\n\
142 --version output version information and exit\n\
144 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
146 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
149 /* Print the characters from START to PAST_END - 1.
150 If START is NULL, just flush the buffer. */
153 output (const char *start, const char *past_end)
155 static char buffer[WRITESIZE];
156 static int bytes_in_buffer = 0;
157 int bytes_to_add = past_end - start;
158 int bytes_available = WRITESIZE - bytes_in_buffer;
162 fwrite (buffer, 1, bytes_in_buffer, stdout);
167 /* Write out as many full buffers as possible. */
168 while (bytes_to_add >= bytes_available)
170 memcpy (buffer + bytes_in_buffer, start, bytes_available);
171 bytes_to_add -= bytes_available;
172 start += bytes_available;
173 fwrite (buffer, 1, WRITESIZE, stdout);
175 bytes_available = WRITESIZE;
178 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
179 bytes_in_buffer += bytes_to_add;
182 /* Print in reverse the file open on descriptor FD for reading FILE.
183 Return 0 if ok, 1 if an error occurs. */
186 tac_seekable (int input_fd, const char *file)
188 /* Pointer to the location in `G_buffer' where the search for
189 the next separator will begin. */
192 /* Pointer to one past the rightmost character in `G_buffer' that
193 has not been printed yet. */
196 /* Length of the record growing in `G_buffer'. */
197 size_t saved_record_size;
199 /* Offset in the file of the next read. */
202 /* Nonzero if `output' has not been called yet for any file.
203 Only used when the separator is attached to the preceding record. */
205 char first_char = *separator; /* Speed optimization, non-regexp. */
206 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
207 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
208 struct re_registers regs;
210 /* Find the size of the input file. */
211 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
213 return 0; /* It's an empty file. */
215 /* Arrange for the first read to lop off enough to leave the rest of the
216 file a multiple of `read_size'. Since `read_size' can change, this may
217 not always hold during the program run, but since it usually will, leave
218 it here for i/o efficiency (page/sector boundaries and all that).
219 Note: the efficiency gain has not been verified. */
220 saved_record_size = file_pos % read_size;
221 if (saved_record_size == 0)
222 saved_record_size = read_size;
223 file_pos -= saved_record_size;
224 /* `file_pos' now points to the start of the last (probably partial) block
225 in the input file. */
227 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
228 error (0, errno, "%s: seek failed", file);
230 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
232 error (0, errno, "%s", file);
236 match_start = past_end = G_buffer + saved_record_size;
237 /* For non-regexp search, move past impossible positions for a match. */
239 match_start -= match_length1;
243 /* Search backward from `match_start' - 1 to `G_buffer' for a match
244 with `separator'; for speed, use strncmp if `separator' contains no
246 If the match succeeds, set `match_start' to point to the start of
247 the match and `match_length' to the length of the match.
248 Otherwise, make `match_start' < `G_buffer'. */
249 if (sentinel_length == 0)
251 int i = match_start - G_buffer;
254 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
256 match_start = G_buffer - 1;
259 error (EXIT_FAILURE, 0,
260 _("error in regular expression search"));
264 match_start = G_buffer + regs.start[0];
265 match_length = regs.end[0] - regs.start[0];
270 /* `match_length' is constant for non-regexp boundaries. */
271 while (*--match_start != first_char
272 || (match_length1 && strncmp (match_start + 1, separator1,
277 /* Check whether we backed off the front of `G_buffer' without finding
278 a match for `separator'. */
279 if (match_start < G_buffer)
283 /* Hit the beginning of the file; print the remaining record. */
284 output (G_buffer, past_end);
288 saved_record_size = past_end - G_buffer;
289 if (saved_record_size > read_size)
291 /* `G_buffer_size' is about twice `read_size', so since
292 we want to read in another `read_size' bytes before
293 the data already in `G_buffer', we need to increase
296 int offset = sentinel_length ? sentinel_length : 1;
299 G_buffer_size = read_size * 2 + sentinel_length + 2;
300 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
302 /* Adjust the pointers for the new buffer location. */
303 match_start += newbuffer - G_buffer;
304 past_end += newbuffer - G_buffer;
305 G_buffer = newbuffer;
308 /* Back up to the start of the next bufferfull of the file. */
309 if (file_pos >= read_size)
310 file_pos -= read_size;
313 read_size = file_pos;
316 lseek (input_fd, file_pos, SEEK_SET);
318 /* Shift the pending record data right to make room for the new.
319 The source and destination regions probably overlap. */
320 memmove (G_buffer + read_size, G_buffer, saved_record_size);
321 past_end = G_buffer + read_size + saved_record_size;
322 /* For non-regexp searches, avoid unneccessary scanning. */
324 match_start = G_buffer + read_size;
326 match_start = past_end;
328 if (safe_read (input_fd, G_buffer, read_size) != read_size)
330 error (0, errno, "%s", file);
336 /* Found a match of `separator'. */
337 if (separator_ends_record)
339 char *match_end = match_start + match_length;
341 /* If this match of `separator' isn't at the end of the
342 file, print the record. */
343 if (first_time == 0 || match_end != past_end)
344 output (match_end, past_end);
345 past_end = match_end;
350 output (match_start, past_end);
351 past_end = match_start;
354 /* For non-regex matching, we can back up. */
355 if (sentinel_length > 0)
356 match_start -= match_length - 1;
361 /* Print FILE in reverse.
362 Return 0 if ok, 1 if an error occurs. */
365 tac_file (const char *file)
370 in = fopen (file, "r");
373 error (0, errno, "%s", file);
376 SET_BINARY (fileno (in));
377 errors = tac_seekable (fileno (in), file);
378 if (ferror (in) || fclose (in) == EOF)
380 error (0, errno, "%s", file);
386 #if DONT_UNLINK_WHILE_OPEN
388 static const char *file_to_remove;
389 static FILE *fp_to_close;
392 unlink_tempfile (void)
394 fclose (fp_to_close);
395 unlink (file_to_remove);
399 record_tempfile (const char *fn, FILE *fp)
405 atexit (unlink_tempfile);
411 /* Make a copy of the standard input in `FIXME'. */
414 save_stdin (FILE **g_tmp, char **g_tempfile)
416 static char *template = NULL;
417 static char *tempdir;
423 if (template == NULL)
425 tempdir = getenv ("TMPDIR");
427 tempdir = DEFAULT_TMPDIR;
428 template = xmalloc (strlen (tempdir) + 11);
430 sprintf (template, "%s/tacXXXXXX", tempdir);
432 fd = mkstemp (template);
434 error (EXIT_FAILURE, errno, "%s", tempfile);
436 tmp = fdopen (fd, "w+");
438 error (EXIT_FAILURE, errno, "%s", tempfile);
440 #if DONT_UNLINK_WHILE_OPEN
441 record_tempfile (tempfile, tmp);
448 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
452 error (EXIT_FAILURE, errno, _("stdin: read error"));
454 /* Don't bother checking for failure inside the loop -- check after. */
455 fwrite (G_buffer, 1, bytes_read, tmp);
458 if (ferror (tmp) || fflush (tmp) == EOF)
459 error (EXIT_FAILURE, errno, "%s", tempfile);
463 SET_BINARY (fileno (tmp));
465 *g_tempfile = tempfile;
468 /* Print the standard input in reverse, saving it to temporary
469 file first if it is a pipe.
470 Return 0 if ok, 1 if an error occurs. */
478 /* No tempfile is needed for "tac < file".
479 Use fstat instead of checking for errno == ESPIPE because
480 lseek doesn't work on some special files but doesn't return an
482 if (fstat (STDIN_FILENO, &stats))
484 error (0, errno, _("standard input"));
488 if (S_ISREG (stats.st_mode))
490 errors = tac_seekable (fileno (stdin), _("standard input"));
496 save_stdin (&tmp_stream, &tmp_file);
497 errors = tac_seekable (fileno (tmp_stream), tmp_file);
504 /* BUF_END points one byte past the end of the buffer to be searched. */
507 memrchr (const char *buf_start, const char *buf_end, int c)
509 const char *p = buf_end;
510 while (buf_start <= --p)
512 if (*(const unsigned char *) p == c)
518 /* FIXME: describe */
521 tac_mem (const char *buf, size_t n_bytes, FILE *out)
529 nl = memrchr (buf, buf + n_bytes, '\n');
530 bol = (nl == NULL ? buf : nl + 1);
532 /* If the last line of the input file has no terminating newline,
533 treat it as a special case. */
534 if (bol < buf + n_bytes)
536 /* Print out the line from bol to end of input. */
537 fwrite (bol, 1, (buf + n_bytes) - bol, out);
539 /* Add a newline here. Otherwise, the first and second lines
540 of output would appear to have been joined. */
544 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
546 /* Output the line (which includes a trailing newline)
547 from NL+1 to BOL-1. */
548 fwrite (nl + 1, 1, bol - (nl + 1), out);
553 /* If there's anything left, output the last line: BUF .. BOL-1.
554 When the first byte of the input is a newline, there is nothing
557 fwrite (buf, 1, bol - buf, out);
559 /* FIXME: this is work in progress.... */
563 /* FIXME: describe */
566 tac_stdin_to_mem (void)
569 size_t bufsiz = 8 * BUFSIZ;
570 size_t delta = 8 * BUFSIZ;
577 buf = (char *) malloc (bufsiz);
579 buf = (char *) realloc (buf, bufsiz);
583 /* Free the buffer and fall back on the code that relies on a
589 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
593 error (EXIT_FAILURE, errno, _("stdin: read error"));
594 n_bytes += bytes_read;
599 tac_mem (buf, n_bytes, stdout);
606 main (int argc, char **argv)
608 const char *error_message; /* Return value from re_compile_pattern. */
610 int have_read_stdin = 0;
612 program_name = argv[0];
613 setlocale (LC_ALL, "");
614 bindtextdomain (PACKAGE, LOCALEDIR);
615 textdomain (PACKAGE);
617 atexit (close_stdout);
622 separator_ends_record = 1;
624 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
631 separator_ends_record = 0;
639 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
641 case_GETOPT_HELP_CHAR;
642 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
648 if (sentinel_length == 0)
650 compiled_separator.allocated = 100;
651 compiled_separator.buffer = (unsigned char *)
652 xmalloc (compiled_separator.allocated);
653 compiled_separator.fastmap = xmalloc (256);
654 compiled_separator.translate = 0;
655 error_message = re_compile_pattern (separator, strlen (separator),
656 &compiled_separator);
658 error (EXIT_FAILURE, 0, "%s", error_message);
661 match_length = sentinel_length = strlen (separator);
663 read_size = INITIAL_READSIZE;
664 /* A precaution that will probably never be needed. */
665 while (sentinel_length * 2 >= read_size)
667 G_buffer_size = read_size * 2 + sentinel_length + 2;
668 G_buffer = xmalloc (G_buffer_size);
671 strcpy (G_buffer, separator);
672 G_buffer += sentinel_length;
682 /* We need binary I/O, since `tac' relies
683 on `lseek' and byte counts. */
684 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
685 errors = tac_stdin ();
689 for (; optind < argc; ++optind)
691 if (STREQ (argv[optind], "-"))
694 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
695 errors |= tac_stdin ();
699 /* Binary output will leave the lines' ends (NL or
700 CR/LF) intact when the output is a disk file.
701 Writing a file with CR/LF pairs at end of lines in
702 text mode has no visible effect on console output,
703 since two CRs in a row are just like one CR. */
704 SET_BINARY (STDOUT_FILENO);
705 errors |= tac_file (argv[optind]);
710 /* Flush the output buffer. */
711 output ((char *) NULL, (char *) NULL);
713 if (have_read_stdin && close (0) < 0)
714 error (EXIT_FAILURE, errno, "-");
715 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);