1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS "Jay Lepreau and David MacKenzie"
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 # define DONT_UNLINK_WHILE_OPEN 1
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
75 /* The name this program was run with. */
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* If nonzero, print `separator' along with the record preceding it
82 in the file; otherwise with the record following it. */
83 static int separator_ends_record;
85 /* 0 if `separator' is to be matched as a regular expression;
86 otherwise, the length of `separator', used as a sentinel to
88 static int sentinel_length;
90 /* The length of a match with `separator'. If `sentinel_length' is 0,
91 `match_length' is computed every time a match succeeds;
92 otherwise, it is simply the length of `separator'. */
93 static int match_length;
95 /* The input buffer. */
96 static char *G_buffer;
98 /* The number of bytes to read at once into `buffer'. */
99 static size_t read_size;
101 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
102 The extra 2 bytes allow `past_end' to have a value beyond the
103 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
104 static unsigned G_buffer_size;
106 /* The compiled regular expression representing `separator'. */
107 static struct re_pattern_buffer compiled_separator;
109 static struct option const longopts[] =
111 {"before", no_argument, NULL, 'b'},
112 {"regex", no_argument, NULL, 'r'},
113 {"separator", required_argument, NULL, 's'},
114 {GETOPT_HELP_OPTION_DECL},
115 {GETOPT_VERSION_OPTION_DECL},
123 fprintf (stderr, _("Try `%s --help' for more information.\n"),
128 Usage: %s [OPTION]... [FILE]...\n\
132 Write each FILE to standard output, last line first.\n\
133 With no FILE, or when FILE is -, read standard input.\n\
135 -b, --before attach the separator before instead of after\n\
136 -r, --regex interpret the separator as a regular expression\n\
137 -s, --separator=STRING use STRING as the separator instead of newline\n\
138 --help display this help and exit\n\
139 --version output version information and exit\n\
141 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
143 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
146 /* Print the characters from START to PAST_END - 1.
147 If START is NULL, just flush the buffer. */
150 output (const char *start, const char *past_end)
152 static char buffer[WRITESIZE];
153 static int bytes_in_buffer = 0;
154 int bytes_to_add = past_end - start;
155 int bytes_available = WRITESIZE - bytes_in_buffer;
159 fwrite (buffer, 1, bytes_in_buffer, stdout);
164 /* Write out as many full buffers as possible. */
165 while (bytes_to_add >= bytes_available)
167 memcpy (buffer + bytes_in_buffer, start, bytes_available);
168 bytes_to_add -= bytes_available;
169 start += bytes_available;
170 fwrite (buffer, 1, WRITESIZE, stdout);
172 bytes_available = WRITESIZE;
175 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
176 bytes_in_buffer += bytes_to_add;
179 /* Print in reverse the file open on descriptor FD for reading FILE.
180 Return 0 if ok, 1 if an error occurs. */
183 tac_seekable (int input_fd, const char *file)
185 /* Pointer to the location in `G_buffer' where the search for
186 the next separator will begin. */
189 /* Pointer to one past the rightmost character in `G_buffer' that
190 has not been printed yet. */
193 /* Length of the record growing in `G_buffer'. */
194 size_t saved_record_size;
196 /* Offset in the file of the next read. */
199 /* Nonzero if `output' has not been called yet for any file.
200 Only used when the separator is attached to the preceding record. */
202 char first_char = *separator; /* Speed optimization, non-regexp. */
203 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
204 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
205 struct re_registers regs;
207 /* Find the size of the input file. */
208 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
210 return 0; /* It's an empty file. */
212 /* Arrange for the first read to lop off enough to leave the rest of the
213 file a multiple of `read_size'. Since `read_size' can change, this may
214 not always hold during the program run, but since it usually will, leave
215 it here for i/o efficiency (page/sector boundaries and all that).
216 Note: the efficiency gain has not been verified. */
217 saved_record_size = file_pos % read_size;
218 if (saved_record_size == 0)
219 saved_record_size = read_size;
220 file_pos -= saved_record_size;
221 /* `file_pos' now points to the start of the last (probably partial) block
222 in the input file. */
224 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
225 error (0, errno, "%s: seek failed", file);
227 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
229 error (0, errno, "%s", file);
233 match_start = past_end = G_buffer + saved_record_size;
234 /* For non-regexp search, move past impossible positions for a match. */
236 match_start -= match_length1;
240 /* Search backward from `match_start' - 1 to `G_buffer' for a match
241 with `separator'; for speed, use strncmp if `separator' contains no
243 If the match succeeds, set `match_start' to point to the start of
244 the match and `match_length' to the length of the match.
245 Otherwise, make `match_start' < `G_buffer'. */
246 if (sentinel_length == 0)
248 int i = match_start - G_buffer;
251 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
253 match_start = G_buffer - 1;
256 error (EXIT_FAILURE, 0,
257 _("error in regular expression search"));
261 match_start = G_buffer + regs.start[0];
262 match_length = regs.end[0] - regs.start[0];
267 /* `match_length' is constant for non-regexp boundaries. */
268 while (*--match_start != first_char
269 || (match_length1 && strncmp (match_start + 1, separator1,
274 /* Check whether we backed off the front of `G_buffer' without finding
275 a match for `separator'. */
276 if (match_start < G_buffer)
280 /* Hit the beginning of the file; print the remaining record. */
281 output (G_buffer, past_end);
285 saved_record_size = past_end - G_buffer;
286 if (saved_record_size > read_size)
288 /* `G_buffer_size' is about twice `read_size', so since
289 we want to read in another `read_size' bytes before
290 the data already in `G_buffer', we need to increase
293 int offset = sentinel_length ? sentinel_length : 1;
296 G_buffer_size = read_size * 2 + sentinel_length + 2;
297 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
299 /* Adjust the pointers for the new buffer location. */
300 match_start += newbuffer - G_buffer;
301 past_end += newbuffer - G_buffer;
302 G_buffer = newbuffer;
305 /* Back up to the start of the next bufferfull of the file. */
306 if (file_pos >= read_size)
307 file_pos -= read_size;
310 read_size = file_pos;
313 lseek (input_fd, file_pos, SEEK_SET);
315 /* Shift the pending record data right to make room for the new.
316 The source and destination regions probably overlap. */
317 memmove (G_buffer + read_size, G_buffer, saved_record_size);
318 past_end = G_buffer + read_size + saved_record_size;
319 /* For non-regexp searches, avoid unneccessary scanning. */
321 match_start = G_buffer + read_size;
323 match_start = past_end;
325 if (safe_read (input_fd, G_buffer, read_size) != read_size)
327 error (0, errno, "%s", file);
333 /* Found a match of `separator'. */
334 if (separator_ends_record)
336 char *match_end = match_start + match_length;
338 /* If this match of `separator' isn't at the end of the
339 file, print the record. */
340 if (first_time == 0 || match_end != past_end)
341 output (match_end, past_end);
342 past_end = match_end;
347 output (match_start, past_end);
348 past_end = match_start;
351 /* For non-regex matching, we can back up. */
352 if (sentinel_length > 0)
353 match_start -= match_length - 1;
358 /* Print FILE in reverse.
359 Return 0 if ok, 1 if an error occurs. */
362 tac_file (const char *file)
367 in = fopen (file, "r");
370 error (0, errno, "%s", file);
373 SET_BINARY (fileno (in));
374 errors = tac_seekable (fileno (in), file);
375 if (ferror (in) || fclose (in) == EOF)
377 error (0, errno, "%s", file);
383 #if DONT_UNLINK_WHILE_OPEN
385 static const char *file_to_remove;
386 static FILE *fp_to_close;
389 unlink_tempfile (void)
391 fclose (fp_to_close);
392 unlink (file_to_remove);
396 record_tempfile (const char *fn, FILE *fp)
402 atexit (unlink_tempfile);
408 /* Make a copy of the standard input in `FIXME'. */
411 save_stdin (FILE **g_tmp, char **g_tempfile)
413 static char *template = NULL;
414 static char *tempdir;
420 if (template == NULL)
422 tempdir = getenv ("TMPDIR");
424 tempdir = DEFAULT_TMPDIR;
425 template = xmalloc (strlen (tempdir) + 11);
427 sprintf (template, "%s/tacXXXXXX", tempdir);
429 fd = mkstemp (template);
431 error (EXIT_FAILURE, errno, "%s", tempfile);
433 tmp = fdopen (fd, "w+");
435 error (EXIT_FAILURE, errno, "%s", tempfile);
437 #if DONT_UNLINK_WHILE_OPEN
438 record_tempfile (tempfile, tmp);
445 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
449 error (EXIT_FAILURE, errno, _("stdin: read error"));
451 /* Don't bother checking for failure inside the loop -- check after. */
452 fwrite (G_buffer, 1, bytes_read, tmp);
455 if (ferror (tmp) || fflush (tmp) == EOF)
456 error (EXIT_FAILURE, errno, "%s", tempfile);
460 SET_BINARY (fileno (tmp));
462 *g_tempfile = tempfile;
465 /* Print the standard input in reverse, saving it to temporary
466 file first if it is a pipe.
467 Return 0 if ok, 1 if an error occurs. */
475 /* No tempfile is needed for "tac < file".
476 Use fstat instead of checking for errno == ESPIPE because
477 lseek doesn't work on some special files but doesn't return an
479 if (fstat (STDIN_FILENO, &stats))
481 error (0, errno, _("standard input"));
485 if (S_ISREG (stats.st_mode))
487 errors = tac_seekable (fileno (stdin), _("standard input"));
493 save_stdin (&tmp_stream, &tmp_file);
494 errors = tac_seekable (fileno (tmp_stream), tmp_file);
501 /* BUF_END points one byte past the end of the buffer to be searched. */
504 memrchr (const char *buf_start, const char *buf_end, int c)
506 const char *p = buf_end;
507 while (buf_start <= --p)
509 if (*(const unsigned char *) p == c)
515 /* FIXME: describe */
518 tac_mem (const char *buf, size_t n_bytes, FILE *out)
526 nl = memrchr (buf, buf + n_bytes, '\n');
527 bol = (nl == NULL ? buf : nl + 1);
529 /* If the last line of the input file has no terminating newline,
530 treat it as a special case. */
531 if (bol < buf + n_bytes)
533 /* Print out the line from bol to end of input. */
534 fwrite (bol, 1, (buf + n_bytes) - bol, out);
536 /* Add a newline here. Otherwise, the first and second lines
537 of output would appear to have been joined. */
541 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
543 /* Output the line (which includes a trailing newline)
544 from NL+1 to BOL-1. */
545 fwrite (nl + 1, 1, bol - (nl + 1), out);
550 /* If there's anything left, output the last line: BUF .. BOL-1.
551 When the first byte of the input is a newline, there is nothing
554 fwrite (buf, 1, bol - buf, out);
556 /* FIXME: this is work in progress.... */
560 /* FIXME: describe */
563 tac_stdin_to_mem (void)
566 size_t bufsiz = 8 * BUFSIZ;
567 size_t delta = 8 * BUFSIZ;
574 buf = (char *) malloc (bufsiz);
576 buf = (char *) realloc (buf, bufsiz);
580 /* Free the buffer and fall back on the code that relies on a
586 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
590 error (EXIT_FAILURE, errno, _("stdin: read error"));
591 n_bytes += bytes_read;
596 tac_mem (buf, n_bytes, stdout);
603 main (int argc, char **argv)
605 const char *error_message; /* Return value from re_compile_pattern. */
607 int have_read_stdin = 0;
609 program_name = argv[0];
610 setlocale (LC_ALL, "");
611 bindtextdomain (PACKAGE, LOCALEDIR);
612 textdomain (PACKAGE);
614 atexit (close_stdout);
619 separator_ends_record = 1;
621 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
628 separator_ends_record = 0;
636 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
638 case_GETOPT_HELP_CHAR;
639 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
645 if (sentinel_length == 0)
647 compiled_separator.allocated = 100;
648 compiled_separator.buffer = (unsigned char *)
649 xmalloc (compiled_separator.allocated);
650 compiled_separator.fastmap = xmalloc (256);
651 compiled_separator.translate = 0;
652 error_message = re_compile_pattern (separator, strlen (separator),
653 &compiled_separator);
655 error (EXIT_FAILURE, 0, "%s", error_message);
658 match_length = sentinel_length = strlen (separator);
660 read_size = INITIAL_READSIZE;
661 /* A precaution that will probably never be needed. */
662 while (sentinel_length * 2 >= read_size)
664 G_buffer_size = read_size * 2 + sentinel_length + 2;
665 G_buffer = xmalloc (G_buffer_size);
668 strcpy (G_buffer, separator);
669 G_buffer += sentinel_length;
679 /* We need binary I/O, since `tac' relies
680 on `lseek' and byte counts. */
681 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
682 errors = tac_stdin ();
686 for (; optind < argc; ++optind)
688 if (STREQ (argv[optind], "-"))
691 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
692 errors |= tac_stdin ();
696 /* Binary output will leave the lines' ends (NL or
697 CR/LF) intact when the output is a disk file.
698 Writing a file with CR/LF pairs at end of lines in
699 text mode has no visible effect on console output,
700 since two CRs in a row are just like one CR. */
701 SET_BINARY (STDOUT_FILENO);
702 errors |= tac_file (argv[optind]);
707 /* Flush the output buffer. */
708 output ((char *) NULL, (char *) NULL);
710 if (have_read_stdin && close (0) < 0)
711 error (EXIT_FAILURE, errno, "-");
712 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);