1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88,89,90,91,95,96,97, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
48 #include "safe-read.h"
50 #ifndef DEFAULT_TMPDIR
51 # define DEFAULT_TMPDIR "/tmp"
54 /* The number of bytes per atomic read. */
55 #define INITIAL_READSIZE 8192
57 /* The number of bytes per atomic write. */
58 #define WRITESIZE 8192
62 /* The name this program was run with. */
65 /* The string that separates the records of the file. */
66 static char *separator;
68 /* If nonzero, print `separator' along with the record preceding it
69 in the file; otherwise with the record following it. */
70 static int separator_ends_record;
72 /* 0 if `separator' is to be matched as a regular expression;
73 otherwise, the length of `separator', used as a sentinel to
75 static int sentinel_length;
77 /* The length of a match with `separator'. If `sentinel_length' is 0,
78 `match_length' is computed every time a match succeeds;
79 otherwise, it is simply the length of `separator'. */
80 static int match_length;
82 /* The input buffer. */
83 static char *G_buffer;
85 /* The number of bytes to read at once into `buffer'. */
86 static size_t read_size;
88 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
89 The extra 2 bytes allow `past_end' to have a value beyond the
90 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
91 static unsigned G_buffer_size;
93 /* The compiled regular expression representing `separator'. */
94 static struct re_pattern_buffer compiled_separator;
96 /* If nonzero, display usage information and exit. */
99 /* If nonzero, print the version on standard output then exit. */
100 static int show_version;
102 static struct option const longopts[] =
104 {"before", no_argument, NULL, 'b'},
105 {"regex", no_argument, NULL, 'r'},
106 {"separator", required_argument, NULL, 's'},
107 {"help", no_argument, &show_help, 1},
108 {"version", no_argument, &show_version, 1},
116 fprintf (stderr, _("Try `%s --help' for more information.\n"),
121 Usage: %s [OPTION]... [FILE]...\n\
125 Write each FILE to standard output, last line first.\n\
126 With no FILE, or when FILE is -, read standard input.\n\
128 -b, --before attach the separator before instead of after\n\
129 -r, --regex interpret the separator as a regular expression\n\
130 -s, --separator=STRING use STRING as the separator instead of newline\n\
131 --help display this help and exit\n\
132 --version output version information and exit\n\
134 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
136 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
139 /* Print the characters from START to PAST_END - 1.
140 If START is NULL, just flush the buffer. */
143 output (const char *start, const char *past_end)
145 static char buffer[WRITESIZE];
146 static int bytes_in_buffer = 0;
147 int bytes_to_add = past_end - start;
148 int bytes_available = WRITESIZE - bytes_in_buffer;
152 fwrite (buffer, 1, bytes_in_buffer, stdout);
157 /* Write out as many full buffers as possible. */
158 while (bytes_to_add >= bytes_available)
160 memcpy (buffer + bytes_in_buffer, start, bytes_available);
161 bytes_to_add -= bytes_available;
162 start += bytes_available;
163 fwrite (buffer, 1, WRITESIZE, stdout);
165 bytes_available = WRITESIZE;
168 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
169 bytes_in_buffer += bytes_to_add;
172 /* Print in reverse the file open on descriptor FD for reading FILE.
173 Return 0 if ok, 1 if an error occurs. */
176 tac_seekable (int input_fd, const char *file)
178 /* Pointer to the location in `G_buffer' where the search for
179 the next separator will begin. */
182 /* Pointer to one past the rightmost character in `G_buffer' that
183 has not been printed yet. */
186 /* Length of the record growing in `G_buffer'. */
187 size_t saved_record_size;
189 /* Offset in the file of the next read. */
192 /* Nonzero if `output' has not been called yet for any file.
193 Only used when the separator is attached to the preceding record. */
195 char first_char = *separator; /* Speed optimization, non-regexp. */
196 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
197 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
198 struct re_registers regs;
200 /* Find the size of the input file. */
201 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
203 return 0; /* It's an empty file. */
205 /* Arrange for the first read to lop off enough to leave the rest of the
206 file a multiple of `read_size'. Since `read_size' can change, this may
207 not always hold during the program run, but since it usually will, leave
208 it here for i/o efficiency (page/sector boundaries and all that).
209 Note: the efficiency gain has not been verified. */
210 saved_record_size = file_pos % read_size;
211 if (saved_record_size == 0)
212 saved_record_size = read_size;
213 file_pos -= saved_record_size;
214 /* `file_pos' now points to the start of the last (probably partial) block
215 in the input file. */
217 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
218 error (0, errno, "%s: seek failed", file);
220 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
222 error (0, errno, "%s", file);
226 match_start = past_end = G_buffer + saved_record_size;
227 /* For non-regexp search, move past impossible positions for a match. */
229 match_start -= match_length1;
233 /* Search backward from `match_start' - 1 to `G_buffer' for a match
234 with `separator'; for speed, use strncmp if `separator' contains no
236 If the match succeeds, set `match_start' to point to the start of
237 the match and `match_length' to the length of the match.
238 Otherwise, make `match_start' < `G_buffer'. */
239 if (sentinel_length == 0)
241 int i = match_start - G_buffer;
244 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
246 match_start = G_buffer - 1;
249 error (EXIT_FAILURE, 0,
250 _("error in regular expression search"));
254 match_start = G_buffer + regs.start[0];
255 match_length = regs.end[0] - regs.start[0];
260 /* `match_length' is constant for non-regexp boundaries. */
261 while (*--match_start != first_char
262 || (match_length1 && strncmp (match_start + 1, separator1,
267 /* Check whether we backed off the front of `G_buffer' without finding
268 a match for `separator'. */
269 if (match_start < G_buffer)
273 /* Hit the beginning of the file; print the remaining record. */
274 output (G_buffer, past_end);
278 saved_record_size = past_end - G_buffer;
279 if (saved_record_size > read_size)
281 /* `G_buffer_size' is about twice `read_size', so since
282 we want to read in another `read_size' bytes before
283 the data already in `G_buffer', we need to increase
286 int offset = sentinel_length ? sentinel_length : 1;
289 G_buffer_size = read_size * 2 + sentinel_length + 2;
290 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
292 /* Adjust the pointers for the new buffer location. */
293 match_start += newbuffer - G_buffer;
294 past_end += newbuffer - G_buffer;
295 G_buffer = newbuffer;
298 /* Back up to the start of the next bufferfull of the file. */
299 if (file_pos >= read_size)
300 file_pos -= read_size;
303 read_size = file_pos;
306 lseek (input_fd, file_pos, SEEK_SET);
308 /* Shift the pending record data right to make room for the new.
309 The source and destination regions probably overlap. */
310 memmove (G_buffer + read_size, G_buffer, saved_record_size);
311 past_end = G_buffer + read_size + saved_record_size;
312 /* For non-regexp searches, avoid unneccessary scanning. */
314 match_start = G_buffer + read_size;
316 match_start = past_end;
318 if (safe_read (input_fd, G_buffer, read_size) != read_size)
320 error (0, errno, "%s", file);
326 /* Found a match of `separator'. */
327 if (separator_ends_record)
329 char *match_end = match_start + match_length;
331 /* If this match of `separator' isn't at the end of the
332 file, print the record. */
333 if (first_time == 0 || match_end != past_end)
334 output (match_end, past_end);
335 past_end = match_end;
340 output (match_start, past_end);
341 past_end = match_start;
344 /* For non-regex matching, we can back up. */
345 if (sentinel_length > 0)
346 match_start -= match_length - 1;
351 /* Print FILE in reverse.
352 Return 0 if ok, 1 if an error occurs. */
355 tac_file (const char *file)
360 in = fopen (file, "r");
363 error (0, errno, "%s", file);
366 errors = tac_seekable (fileno (in), file);
367 if (ferror (in) || fclose (in) == EOF)
369 error (0, errno, "%s", file);
375 /* Make a copy of the standard input in `FIXME'. */
378 save_stdin (FILE **g_tmp, char **g_tempfile)
380 static char *template = NULL;
381 static char *tempdir;
382 static char *tempfile;
387 if (template == NULL)
389 tempdir = getenv ("TMPDIR");
391 tempdir = DEFAULT_TMPDIR;
392 template = xmalloc (strlen (tempdir) + 11);
394 sprintf (template, "%s/tacXXXXXX", tempdir);
395 tempfile = mktemp (template);
397 fd = open (tempfile, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
399 error (EXIT_FAILURE, errno, "%s", tempfile);
401 tmp = fdopen (fd, "w+");
403 error (EXIT_FAILURE, errno, "%s", tempfile);
409 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
413 error (EXIT_FAILURE, errno, _("stdin: read error"));
415 /* Don't bother checking for failure inside the loop -- check after. */
416 fwrite (G_buffer, 1, bytes_read, tmp);
419 if (ferror (tmp) || fflush (tmp) == EOF)
420 error (EXIT_FAILURE, errno, "%s", tempfile);
425 *g_tempfile = tempfile;
428 /* Print the standard input in reverse, saving it to temporary
429 file first if it is a pipe.
430 Return 0 if ok, 1 if an error occurs. */
438 /* No tempfile is needed for "tac < file".
439 Use fstat instead of checking for errno == ESPIPE because
440 lseek doesn't work on some special files but doesn't return an
442 if (fstat (STDIN_FILENO, &stats))
444 error (0, errno, _("standard input"));
448 if (S_ISREG (stats.st_mode))
450 errors = tac_seekable (fileno (stdin), _("standard input"));
456 save_stdin (&tmp_stream, &tmp_file);
457 errors = tac_seekable (fileno (tmp_stream), tmp_file);
463 /* BUF_END points one byte past the end of the buffer to be searched. */
466 memrchr (const char *buf_start, const char *buf_end, int c)
468 const char *p = buf_end;
469 while (buf_start <= --p)
471 if (*(const unsigned char *) p == c)
477 /* FIXME: describe */
480 tac_mem (const char *buf, size_t n_bytes, FILE *out)
488 nl = memrchr (buf, buf + n_bytes, '\n');
489 bol = (nl == NULL ? buf : nl + 1);
491 /* If the last line of the input file has no terminating newline,
492 treat it as a special case. */
493 if (bol < buf + n_bytes)
495 /* Print out the line from bol to end of input. */
496 fwrite (bol, 1, (buf + n_bytes) - bol, out);
498 /* Add a newline here. Otherwise, the first and second lines
499 of output would appear to have been joined. */
503 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
505 /* Output the line (which includes a trailing newline)
506 from NL+1 to BOL-1. */
507 fwrite (nl + 1, 1, bol - (nl + 1), out);
512 /* If there's anything left, output the last line: BUF .. BOL-1.
513 When the first byte of the input is a newline, there is nothing
516 fwrite (buf, 1, bol - buf, out);
518 /* FIXME: this is work in progress.... */
522 /* FIXME: describe */
525 tac_stdin_to_mem (void)
528 size_t bufsiz = 8 * BUFSIZ;
529 size_t delta = 8 * BUFSIZ;
536 buf = (char *) malloc (bufsiz);
538 buf = (char *) realloc (buf, bufsiz);
542 /* Free the buffer and fall back on the code that relies on a
548 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
552 error (EXIT_FAILURE, errno, _("stdin: read error"));
553 n_bytes += bytes_read;
558 tac_mem (buf, n_bytes, stdout);
564 main (int argc, char **argv)
566 const char *error_message; /* Return value from re_compile_pattern. */
568 int have_read_stdin = 0;
570 program_name = argv[0];
571 setlocale (LC_ALL, "");
572 bindtextdomain (PACKAGE, LOCALEDIR);
573 textdomain (PACKAGE);
578 separator_ends_record = 1;
580 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
587 separator_ends_record = 0;
595 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
604 printf ("tac (%s) %s\n", GNU_PACKAGE, VERSION);
611 if (sentinel_length == 0)
613 compiled_separator.allocated = 100;
614 compiled_separator.buffer = (unsigned char *)
615 xmalloc (compiled_separator.allocated);
616 compiled_separator.fastmap = xmalloc (256);
617 compiled_separator.translate = 0;
618 error_message = re_compile_pattern (separator, strlen (separator),
619 &compiled_separator);
621 error (EXIT_FAILURE, 0, "%s", error_message);
624 match_length = sentinel_length = strlen (separator);
626 read_size = INITIAL_READSIZE;
627 /* A precaution that will probably never be needed. */
628 while (sentinel_length * 2 >= read_size)
630 G_buffer_size = read_size * 2 + sentinel_length + 2;
631 G_buffer = xmalloc (G_buffer_size);
634 strcpy (G_buffer, separator);
635 G_buffer += sentinel_length;
645 errors = tac_stdin ();
649 for (; optind < argc; ++optind)
651 if (STREQ (argv[optind], "-"))
654 errors |= tac_stdin ();
658 errors |= tac_file (argv[optind]);
663 /* Flush the output buffer. */
664 output ((char *) NULL, (char *) NULL);
666 if (have_read_stdin && close (0) < 0)
667 error (EXIT_FAILURE, errno, "-");
668 if (ferror (stdout) || fclose (stdout) == EOF)
669 error (EXIT_FAILURE, errno, _("write error"));
670 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);