1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS "Jay Lepreau", "David MacKenzie"
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 # define DONT_UNLINK_WHILE_OPEN 1
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
75 /* The name this program was run with. */
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* True if we have ever read standard input. */
82 static bool have_read_stdin = false;
84 /* If true, print `separator' along with the record preceding it
85 in the file; otherwise with the record following it. */
86 static bool separator_ends_record;
88 /* 0 if `separator' is to be matched as a regular expression;
89 otherwise, the length of `separator', used as a sentinel to
91 static size_t sentinel_length;
93 /* The length of a match with `separator'. If `sentinel_length' is 0,
94 `match_length' is computed every time a match succeeds;
95 otherwise, it is simply the length of `separator'. */
96 static size_t match_length;
98 /* The input buffer. */
99 static char *G_buffer;
101 /* The number of bytes to read at once into `buffer'. */
102 static size_t read_size;
104 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
105 The extra 2 bytes allow `past_end' to have a value beyond the
106 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
107 static size_t G_buffer_size;
109 /* The compiled regular expression representing `separator'. */
110 static struct re_pattern_buffer compiled_separator;
112 static struct option const longopts[] =
114 {"before", no_argument, NULL, 'b'},
115 {"regex", no_argument, NULL, 'r'},
116 {"separator", required_argument, NULL, 's'},
117 {GETOPT_HELP_OPTION_DECL},
118 {GETOPT_VERSION_OPTION_DECL},
125 if (status != EXIT_SUCCESS)
126 fprintf (stderr, _("Try `%s --help' for more information.\n"),
131 Usage: %s [OPTION]... [FILE]...\n\
135 Write each FILE to standard output, last line first.\n\
136 With no FILE, or when FILE is -, read standard input.\n\
140 Mandatory arguments to long options are mandatory for short options too.\n\
143 -b, --before attach the separator before instead of after\n\
144 -r, --regex interpret the separator as a regular expression\n\
145 -s, --separator=STRING use STRING as the separator instead of newline\n\
147 fputs (HELP_OPTION_DESCRIPTION, stdout);
148 fputs (VERSION_OPTION_DESCRIPTION, stdout);
149 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
154 /* Print the characters from START to PAST_END - 1.
155 If START is NULL, just flush the buffer. */
158 output (const char *start, const char *past_end)
160 static char buffer[WRITESIZE];
161 static size_t bytes_in_buffer = 0;
162 size_t bytes_to_add = past_end - start;
163 size_t bytes_available = WRITESIZE - bytes_in_buffer;
167 fwrite (buffer, 1, bytes_in_buffer, stdout);
172 /* Write out as many full buffers as possible. */
173 while (bytes_to_add >= bytes_available)
175 memcpy (buffer + bytes_in_buffer, start, bytes_available);
176 bytes_to_add -= bytes_available;
177 start += bytes_available;
178 fwrite (buffer, 1, WRITESIZE, stdout);
180 bytes_available = WRITESIZE;
183 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
184 bytes_in_buffer += bytes_to_add;
187 /* Print in reverse the file open on descriptor FD for reading FILE.
188 Return true if successful. */
191 tac_seekable (int input_fd, const char *file)
193 /* Pointer to the location in `G_buffer' where the search for
194 the next separator will begin. */
197 /* Pointer to one past the rightmost character in `G_buffer' that
198 has not been printed yet. */
201 /* Length of the record growing in `G_buffer'. */
202 size_t saved_record_size;
204 /* Offset in the file of the next read. */
207 /* True if `output' has not been called yet for any file.
208 Only used when the separator is attached to the preceding record. */
209 bool first_time = true;
210 char first_char = *separator; /* Speed optimization, non-regexp. */
211 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
212 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
213 struct re_registers regs;
215 /* Find the size of the input file. */
216 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
218 return true; /* It's an empty file. */
220 /* Arrange for the first read to lop off enough to leave the rest of the
221 file a multiple of `read_size'. Since `read_size' can change, this may
222 not always hold during the program run, but since it usually will, leave
223 it here for i/o efficiency (page/sector boundaries and all that).
224 Note: the efficiency gain has not been verified. */
225 saved_record_size = file_pos % read_size;
226 if (saved_record_size == 0)
227 saved_record_size = read_size;
228 file_pos -= saved_record_size;
229 /* `file_pos' now points to the start of the last (probably partial) block
230 in the input file. */
232 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
233 error (0, errno, _("%s: seek failed"), quote (file));
235 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
237 error (0, errno, "%s", quote (file));
241 match_start = past_end = G_buffer + saved_record_size;
242 /* For non-regexp search, move past impossible positions for a match. */
244 match_start -= match_length1;
248 /* Search backward from `match_start' - 1 to `G_buffer' for a match
249 with `separator'; for speed, use strncmp if `separator' contains no
251 If the match succeeds, set `match_start' to point to the start of
252 the match and `match_length' to the length of the match.
253 Otherwise, make `match_start' < `G_buffer'. */
254 if (sentinel_length == 0)
256 ptrdiff_t i = match_start - G_buffer;
259 if (! (INT_MIN < i && i <= INT_MAX))
260 error (EXIT_FAILURE, 0, _("record too large"));
262 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
264 match_start = G_buffer - 1;
267 error (EXIT_FAILURE, 0,
268 _("error in regular expression search"));
272 match_start = G_buffer + regs.start[0];
273 match_length = regs.end[0] - regs.start[0];
278 /* `match_length' is constant for non-regexp boundaries. */
279 while (*--match_start != first_char
280 || (match_length1 && strncmp (match_start + 1, separator1,
285 /* Check whether we backed off the front of `G_buffer' without finding
286 a match for `separator'. */
287 if (match_start < G_buffer)
291 /* Hit the beginning of the file; print the remaining record. */
292 output (G_buffer, past_end);
296 saved_record_size = past_end - G_buffer;
297 if (saved_record_size > read_size)
299 /* `G_buffer_size' is about twice `read_size', so since
300 we want to read in another `read_size' bytes before
301 the data already in `G_buffer', we need to increase
304 size_t offset = sentinel_length ? sentinel_length : 1;
305 ptrdiff_t match_start_offset = match_start - G_buffer;
306 ptrdiff_t past_end_offset = past_end - G_buffer;
307 size_t old_G_buffer_size = G_buffer_size;
310 G_buffer_size = read_size * 2 + sentinel_length + 2;
311 if (G_buffer_size < old_G_buffer_size)
313 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
315 /* Adjust the pointers for the new buffer location. */
316 match_start = newbuffer + match_start_offset;
317 past_end = newbuffer + past_end_offset;
318 G_buffer = newbuffer;
321 /* Back up to the start of the next bufferfull of the file. */
322 if (file_pos >= read_size)
323 file_pos -= read_size;
326 read_size = file_pos;
329 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
330 error (0, errno, _("%s: seek failed"), quote (file));
332 /* Shift the pending record data right to make room for the new.
333 The source and destination regions probably overlap. */
334 memmove (G_buffer + read_size, G_buffer, saved_record_size);
335 past_end = G_buffer + read_size + saved_record_size;
336 /* For non-regexp searches, avoid unneccessary scanning. */
338 match_start = G_buffer + read_size;
340 match_start = past_end;
342 if (safe_read (input_fd, G_buffer, read_size) != read_size)
344 error (0, errno, "%s", quote (file));
350 /* Found a match of `separator'. */
351 if (separator_ends_record)
353 char *match_end = match_start + match_length;
355 /* If this match of `separator' isn't at the end of the
356 file, print the record. */
357 if (!first_time || match_end != past_end)
358 output (match_end, past_end);
359 past_end = match_end;
364 output (match_start, past_end);
365 past_end = match_start;
368 /* For non-regex matching, we can back up. */
369 if (sentinel_length > 0)
370 match_start -= match_length - 1;
375 #if DONT_UNLINK_WHILE_OPEN
377 static const char *file_to_remove;
378 static FILE *fp_to_close;
381 unlink_tempfile (void)
383 fclose (fp_to_close);
384 unlink (file_to_remove);
388 record_tempfile (const char *fn, FILE *fp)
394 atexit (unlink_tempfile);
400 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
401 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
402 and file name. Exit upon any failure. */
403 /* FIXME: don't exit upon failure!!! */
406 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
408 static char *template = NULL;
409 static char *tempdir;
414 if (template == NULL)
416 tempdir = getenv ("TMPDIR");
418 tempdir = DEFAULT_TMPDIR;
419 template = xmalloc (strlen (tempdir) + 11);
421 sprintf (template, "%s/tacXXXXXX", tempdir);
423 fd = mkstemp (template);
425 error (EXIT_FAILURE, errno, "%s", quote (tempfile));
427 tmp = fdopen (fd, "w+");
429 error (EXIT_FAILURE, errno, "%s", quote (tempfile));
431 #if DONT_UNLINK_WHILE_OPEN
432 record_tempfile (tempfile, tmp);
439 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
442 if (bytes_read == SAFE_READ_ERROR)
443 error (EXIT_FAILURE, errno, _("%s: read error"), quote (file));
445 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
446 error (EXIT_FAILURE, errno, "%s", quote (tempfile));
449 if (fflush (tmp) != 0)
450 error (EXIT_FAILURE, errno, "%s", quote (tempfile));
452 SET_BINARY (fileno (tmp));
454 *g_tempfile = tempfile;
457 /* Copy INPUT_FD to a temporary, then tac that file.
458 Return true if successful. */
461 tac_nonseekable (int input_fd, const char *file)
465 copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
466 return tac_seekable (fileno (tmp_stream), tmp_file);
469 /* Print FILE in reverse, copying it to a temporary
470 file first if it is not seekable.
471 Return true if successful. */
474 tac_file (const char *filename)
480 if (STREQ (filename, "-"))
482 have_read_stdin = true;
484 filename = _("standard input");
488 fd = open (filename, O_RDONLY);
491 error (0, errno, _("cannot open %s for reading"), quote (filename));
496 /* We need binary I/O, since `tac' relies
497 on `lseek' and byte counts.
499 Binary output will leave the lines' ends (NL or
500 CR/LF) intact when the output is a disk file.
501 Writing a file with CR/LF pairs at end of lines in
502 text mode has no visible effect on console output,
503 since two CRs in a row are just like one CR. */
504 SET_BINARY2 (fd, STDOUT_FILENO);
506 file_size = lseek (fd, (off_t) 0, SEEK_END);
509 ? tac_seekable (fd, filename)
510 : tac_nonseekable (fd, filename));
512 if (fd != STDIN_FILENO && close (fd) == -1)
514 error (0, errno, _("closing %s"), quote (filename));
521 /* BUF_END points one byte past the end of the buffer to be searched. */
523 /* FIXME: describe */
526 tac_mem (const char *buf, size_t n_bytes, FILE *out)
534 nl = memrchr (buf, buf + n_bytes, '\n');
535 bol = (nl == NULL ? buf : nl + 1);
537 /* If the last line of the input file has no terminating newline,
538 treat it as a special case. */
539 if (bol < buf + n_bytes)
541 /* Print out the line from bol to end of input. */
542 fwrite (bol, 1, (buf + n_bytes) - bol, out);
544 /* Add a newline here. Otherwise, the first and second lines
545 of output would appear to have been joined. */
549 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
551 /* Output the line (which includes a trailing newline)
552 from NL+1 to BOL-1. */
553 fwrite (nl + 1, 1, bol - (nl + 1), out);
558 /* If there's anything left, output the last line: BUF .. BOL-1.
559 When the first byte of the input is a newline, there is nothing
562 fwrite (buf, 1, bol - buf, out);
564 /* FIXME: this is work in progress.... */
567 /* FIXME: describe */
570 tac_stdin_to_mem (void)
573 size_t bufsiz = 8 * BUFSIZ;
574 size_t delta = 8 * BUFSIZ;
580 char *new_buf = realloc (buf, bufsiz);
584 /* Write contents of buf to a temporary file, ... */
587 /* Free the buffer and fall back on the code that relies on a
595 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
598 if (bytes_read == SAFE_READ_ERROR)
599 error (EXIT_FAILURE, errno, _("stdin: read error"));
600 n_bytes += bytes_read;
605 tac_mem (buf, n_bytes, stdout);
612 main (int argc, char **argv)
614 const char *error_message; /* Return value from re_compile_pattern. */
617 size_t half_buffer_size;
619 /* Initializer for file_list if no file-arguments
620 were specified on the command line. */
621 static char const *const default_file_list[] = {"-", NULL};
622 char const *const *file;
624 initialize_main (&argc, &argv);
625 program_name = argv[0];
626 setlocale (LC_ALL, "");
627 bindtextdomain (PACKAGE, LOCALEDIR);
628 textdomain (PACKAGE);
630 atexit (close_stdout);
634 separator_ends_record = true;
636 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
641 separator_ends_record = false;
649 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
651 case_GETOPT_HELP_CHAR;
652 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
654 usage (EXIT_FAILURE);
658 if (sentinel_length == 0)
660 compiled_separator.allocated = 100;
661 compiled_separator.buffer = xmalloc (compiled_separator.allocated);
662 compiled_separator.fastmap = xmalloc (256);
663 compiled_separator.translate = 0;
664 error_message = re_compile_pattern (separator, strlen (separator),
665 &compiled_separator);
667 error (EXIT_FAILURE, 0, "%s", error_message);
670 match_length = sentinel_length = strlen (separator);
672 read_size = INITIAL_READSIZE;
673 while (sentinel_length >= read_size / 2)
675 if (SIZE_MAX / 2 < read_size)
679 half_buffer_size = read_size + sentinel_length + 1;
680 G_buffer_size = 2 * half_buffer_size;
681 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
683 G_buffer = xmalloc (G_buffer_size);
686 strcpy (G_buffer, separator);
687 G_buffer += sentinel_length;
694 file = (optind < argc
695 ? (char const *const *) &argv[optind]
696 : default_file_list);
701 for (i = 0; file[i]; ++i)
702 ok &= tac_file (file[i]);
705 /* Flush the output buffer. */
706 output ((char *) NULL, (char *) NULL);
708 if (have_read_stdin && close (STDIN_FILENO) < 0)
709 error (EXIT_FAILURE, errno, "-");
710 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);