1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS N_ ("Jay Lepreau and David MacKenzie")
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 # define DONT_UNLINK_WHILE_OPEN 1
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
75 /* The name this program was run with. */
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* If nonzero, print `separator' along with the record preceding it
82 in the file; otherwise with the record following it. */
83 static int separator_ends_record;
85 /* 0 if `separator' is to be matched as a regular expression;
86 otherwise, the length of `separator', used as a sentinel to
88 static int sentinel_length;
90 /* The length of a match with `separator'. If `sentinel_length' is 0,
91 `match_length' is computed every time a match succeeds;
92 otherwise, it is simply the length of `separator'. */
93 static int match_length;
95 /* The input buffer. */
96 static char *G_buffer;
98 /* The number of bytes to read at once into `buffer'. */
99 static size_t read_size;
101 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
102 The extra 2 bytes allow `past_end' to have a value beyond the
103 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
104 static unsigned G_buffer_size;
106 /* The compiled regular expression representing `separator'. */
107 static struct re_pattern_buffer compiled_separator;
109 static struct option const longopts[] =
111 {"before", no_argument, NULL, 'b'},
112 {"regex", no_argument, NULL, 'r'},
113 {"separator", required_argument, NULL, 's'},
114 {GETOPT_HELP_OPTION_DECL},
115 {GETOPT_VERSION_OPTION_DECL},
123 fprintf (stderr, _("Try `%s --help' for more information.\n"),
128 Usage: %s [OPTION]... [FILE]...\n\
132 Write each FILE to standard output, last line first.\n\
133 With no FILE, or when FILE is -, read standard input.\n\
137 Mandatory arguments to long options are mandatory for short options too.\n\
140 -b, --before attach the separator before instead of after\n\
141 -r, --regex interpret the separator as a regular expression\n\
142 -s, --separator=STRING use STRING as the separator instead of newline\n\
145 --help display this help and exit\n\
146 --version output version information and exit\n\
148 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
150 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
153 /* Print the characters from START to PAST_END - 1.
154 If START is NULL, just flush the buffer. */
157 output (const char *start, const char *past_end)
159 static char buffer[WRITESIZE];
160 static int bytes_in_buffer = 0;
161 int bytes_to_add = past_end - start;
162 int bytes_available = WRITESIZE - bytes_in_buffer;
166 fwrite (buffer, 1, bytes_in_buffer, stdout);
171 /* Write out as many full buffers as possible. */
172 while (bytes_to_add >= bytes_available)
174 memcpy (buffer + bytes_in_buffer, start, bytes_available);
175 bytes_to_add -= bytes_available;
176 start += bytes_available;
177 fwrite (buffer, 1, WRITESIZE, stdout);
179 bytes_available = WRITESIZE;
182 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
183 bytes_in_buffer += bytes_to_add;
186 /* Print in reverse the file open on descriptor FD for reading FILE.
187 Return 0 if ok, 1 if an error occurs. */
190 tac_seekable (int input_fd, const char *file)
192 /* Pointer to the location in `G_buffer' where the search for
193 the next separator will begin. */
196 /* Pointer to one past the rightmost character in `G_buffer' that
197 has not been printed yet. */
200 /* Length of the record growing in `G_buffer'. */
201 size_t saved_record_size;
203 /* Offset in the file of the next read. */
206 /* Nonzero if `output' has not been called yet for any file.
207 Only used when the separator is attached to the preceding record. */
209 char first_char = *separator; /* Speed optimization, non-regexp. */
210 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
211 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
212 struct re_registers regs;
214 /* Find the size of the input file. */
215 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
217 return 0; /* It's an empty file. */
219 /* Arrange for the first read to lop off enough to leave the rest of the
220 file a multiple of `read_size'. Since `read_size' can change, this may
221 not always hold during the program run, but since it usually will, leave
222 it here for i/o efficiency (page/sector boundaries and all that).
223 Note: the efficiency gain has not been verified. */
224 saved_record_size = file_pos % read_size;
225 if (saved_record_size == 0)
226 saved_record_size = read_size;
227 file_pos -= saved_record_size;
228 /* `file_pos' now points to the start of the last (probably partial) block
229 in the input file. */
231 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
232 error (0, errno, "%s: seek failed", file);
234 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
236 error (0, errno, "%s", file);
240 match_start = past_end = G_buffer + saved_record_size;
241 /* For non-regexp search, move past impossible positions for a match. */
243 match_start -= match_length1;
247 /* Search backward from `match_start' - 1 to `G_buffer' for a match
248 with `separator'; for speed, use strncmp if `separator' contains no
250 If the match succeeds, set `match_start' to point to the start of
251 the match and `match_length' to the length of the match.
252 Otherwise, make `match_start' < `G_buffer'. */
253 if (sentinel_length == 0)
255 int i = match_start - G_buffer;
258 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
260 match_start = G_buffer - 1;
263 error (EXIT_FAILURE, 0,
264 _("error in regular expression search"));
268 match_start = G_buffer + regs.start[0];
269 match_length = regs.end[0] - regs.start[0];
274 /* `match_length' is constant for non-regexp boundaries. */
275 while (*--match_start != first_char
276 || (match_length1 && strncmp (match_start + 1, separator1,
281 /* Check whether we backed off the front of `G_buffer' without finding
282 a match for `separator'. */
283 if (match_start < G_buffer)
287 /* Hit the beginning of the file; print the remaining record. */
288 output (G_buffer, past_end);
292 saved_record_size = past_end - G_buffer;
293 if (saved_record_size > read_size)
295 /* `G_buffer_size' is about twice `read_size', so since
296 we want to read in another `read_size' bytes before
297 the data already in `G_buffer', we need to increase
300 int offset = sentinel_length ? sentinel_length : 1;
303 G_buffer_size = read_size * 2 + sentinel_length + 2;
304 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
306 /* Adjust the pointers for the new buffer location. */
307 match_start += newbuffer - G_buffer;
308 past_end += newbuffer - G_buffer;
309 G_buffer = newbuffer;
312 /* Back up to the start of the next bufferfull of the file. */
313 if (file_pos >= read_size)
314 file_pos -= read_size;
317 read_size = file_pos;
320 lseek (input_fd, file_pos, SEEK_SET);
322 /* Shift the pending record data right to make room for the new.
323 The source and destination regions probably overlap. */
324 memmove (G_buffer + read_size, G_buffer, saved_record_size);
325 past_end = G_buffer + read_size + saved_record_size;
326 /* For non-regexp searches, avoid unneccessary scanning. */
328 match_start = G_buffer + read_size;
330 match_start = past_end;
332 if (safe_read (input_fd, G_buffer, read_size) != read_size)
334 error (0, errno, "%s", file);
340 /* Found a match of `separator'. */
341 if (separator_ends_record)
343 char *match_end = match_start + match_length;
345 /* If this match of `separator' isn't at the end of the
346 file, print the record. */
347 if (first_time == 0 || match_end != past_end)
348 output (match_end, past_end);
349 past_end = match_end;
354 output (match_start, past_end);
355 past_end = match_start;
358 /* For non-regex matching, we can back up. */
359 if (sentinel_length > 0)
360 match_start -= match_length - 1;
365 /* Print FILE in reverse.
366 Return 0 if ok, 1 if an error occurs. */
369 tac_file (const char *file)
374 in = fopen (file, "r");
377 error (0, errno, "%s", file);
380 SET_BINARY (fileno (in));
381 errors = tac_seekable (fileno (in), file);
382 if (ferror (in) || fclose (in) == EOF)
384 error (0, errno, "%s", file);
390 #if DONT_UNLINK_WHILE_OPEN
392 static const char *file_to_remove;
393 static FILE *fp_to_close;
396 unlink_tempfile (void)
398 fclose (fp_to_close);
399 unlink (file_to_remove);
403 record_tempfile (const char *fn, FILE *fp)
409 atexit (unlink_tempfile);
415 /* Make a copy of the standard input in `FIXME'. */
418 save_stdin (FILE **g_tmp, char **g_tempfile)
420 static char *template = NULL;
421 static char *tempdir;
427 if (template == NULL)
429 tempdir = getenv ("TMPDIR");
431 tempdir = DEFAULT_TMPDIR;
432 template = xmalloc (strlen (tempdir) + 11);
434 sprintf (template, "%s/tacXXXXXX", tempdir);
436 fd = mkstemp (template);
438 error (EXIT_FAILURE, errno, "%s", tempfile);
440 tmp = fdopen (fd, "w+");
442 error (EXIT_FAILURE, errno, "%s", tempfile);
444 #if DONT_UNLINK_WHILE_OPEN
445 record_tempfile (tempfile, tmp);
452 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
456 error (EXIT_FAILURE, errno, _("stdin: read error"));
458 /* Don't bother checking for failure inside the loop -- check after. */
459 fwrite (G_buffer, 1, bytes_read, tmp);
462 if (ferror (tmp) || fflush (tmp) == EOF)
463 error (EXIT_FAILURE, errno, "%s", tempfile);
467 SET_BINARY (fileno (tmp));
469 *g_tempfile = tempfile;
472 /* Print the standard input in reverse, saving it to temporary
473 file first if it is a pipe.
474 Return 0 if ok, 1 if an error occurs. */
482 /* No tempfile is needed for "tac < file".
483 Use fstat instead of checking for errno == ESPIPE because
484 lseek doesn't work on some special files but doesn't return an
486 if (fstat (STDIN_FILENO, &stats))
488 error (0, errno, _("standard input"));
492 if (S_ISREG (stats.st_mode))
494 errors = tac_seekable (fileno (stdin), _("standard input"));
500 save_stdin (&tmp_stream, &tmp_file);
501 errors = tac_seekable (fileno (tmp_stream), tmp_file);
508 /* BUF_END points one byte past the end of the buffer to be searched. */
511 memrchr (const char *buf_start, const char *buf_end, int c)
513 const char *p = buf_end;
514 while (buf_start <= --p)
516 if (*(const unsigned char *) p == c)
522 /* FIXME: describe */
525 tac_mem (const char *buf, size_t n_bytes, FILE *out)
533 nl = memrchr (buf, buf + n_bytes, '\n');
534 bol = (nl == NULL ? buf : nl + 1);
536 /* If the last line of the input file has no terminating newline,
537 treat it as a special case. */
538 if (bol < buf + n_bytes)
540 /* Print out the line from bol to end of input. */
541 fwrite (bol, 1, (buf + n_bytes) - bol, out);
543 /* Add a newline here. Otherwise, the first and second lines
544 of output would appear to have been joined. */
548 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
550 /* Output the line (which includes a trailing newline)
551 from NL+1 to BOL-1. */
552 fwrite (nl + 1, 1, bol - (nl + 1), out);
557 /* If there's anything left, output the last line: BUF .. BOL-1.
558 When the first byte of the input is a newline, there is nothing
561 fwrite (buf, 1, bol - buf, out);
563 /* FIXME: this is work in progress.... */
567 /* FIXME: describe */
570 tac_stdin_to_mem (void)
573 size_t bufsiz = 8 * BUFSIZ;
574 size_t delta = 8 * BUFSIZ;
581 buf = (char *) malloc (bufsiz);
583 buf = (char *) realloc (buf, bufsiz);
587 /* Free the buffer and fall back on the code that relies on a
593 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
597 error (EXIT_FAILURE, errno, _("stdin: read error"));
598 n_bytes += bytes_read;
603 tac_mem (buf, n_bytes, stdout);
610 main (int argc, char **argv)
612 const char *error_message; /* Return value from re_compile_pattern. */
614 int have_read_stdin = 0;
616 program_name = argv[0];
617 setlocale (LC_ALL, "");
618 bindtextdomain (PACKAGE, LOCALEDIR);
619 textdomain (PACKAGE);
621 atexit (close_stdout);
626 separator_ends_record = 1;
628 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
635 separator_ends_record = 0;
643 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
645 case_GETOPT_HELP_CHAR;
646 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
652 if (sentinel_length == 0)
654 compiled_separator.allocated = 100;
655 compiled_separator.buffer = (unsigned char *)
656 xmalloc (compiled_separator.allocated);
657 compiled_separator.fastmap = xmalloc (256);
658 compiled_separator.translate = 0;
659 error_message = re_compile_pattern (separator, strlen (separator),
660 &compiled_separator);
662 error (EXIT_FAILURE, 0, "%s", error_message);
665 match_length = sentinel_length = strlen (separator);
667 read_size = INITIAL_READSIZE;
668 /* A precaution that will probably never be needed. */
669 while (sentinel_length * 2 >= read_size)
671 G_buffer_size = read_size * 2 + sentinel_length + 2;
672 G_buffer = xmalloc (G_buffer_size);
675 strcpy (G_buffer, separator);
676 G_buffer += sentinel_length;
686 /* We need binary I/O, since `tac' relies
687 on `lseek' and byte counts. */
688 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
689 errors = tac_stdin ();
693 for (; optind < argc; ++optind)
695 if (STREQ (argv[optind], "-"))
698 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
699 errors |= tac_stdin ();
703 /* Binary output will leave the lines' ends (NL or
704 CR/LF) intact when the output is a disk file.
705 Writing a file with CR/LF pairs at end of lines in
706 text mode has no visible effect on console output,
707 since two CRs in a row are just like one CR. */
708 SET_BINARY (STDOUT_FILENO);
709 errors |= tac_file (argv[optind]);
714 /* Flush the output buffer. */
715 output ((char *) NULL, (char *) NULL);
717 if (have_read_stdin && close (0) < 0)
718 error (EXIT_FAILURE, errno, "-");
719 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);