1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88,89,90,91,95,96,97, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
52 #include "safe-read.h"
54 #ifndef DEFAULT_TMPDIR
55 # define DEFAULT_TMPDIR "/tmp"
58 /* The number of bytes per atomic read. */
59 #define INITIAL_READSIZE 8192
61 /* The number of bytes per atomic write. */
62 #define WRITESIZE 8192
66 /* The name this program was run with. */
69 /* The string that separates the records of the file. */
70 static char *separator;
72 /* If nonzero, print `separator' along with the record preceding it
73 in the file; otherwise with the record following it. */
74 static int separator_ends_record;
76 /* 0 if `separator' is to be matched as a regular expression;
77 otherwise, the length of `separator', used as a sentinel to
79 static int sentinel_length;
81 /* The length of a match with `separator'. If `sentinel_length' is 0,
82 `match_length' is computed every time a match succeeds;
83 otherwise, it is simply the length of `separator'. */
84 static int match_length;
86 /* The input buffer. */
87 static char *G_buffer;
89 /* The number of bytes to read at once into `buffer'. */
90 static size_t read_size;
92 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
93 The extra 2 bytes allow `past_end' to have a value beyond the
94 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
95 static unsigned G_buffer_size;
97 /* The compiled regular expression representing `separator'. */
98 static struct re_pattern_buffer compiled_separator;
100 /* If nonzero, display usage information and exit. */
101 static int show_help;
103 /* If nonzero, print the version on standard output then exit. */
104 static int show_version;
106 static struct option const longopts[] =
108 {"before", no_argument, NULL, 'b'},
109 {"regex", no_argument, NULL, 'r'},
110 {"separator", required_argument, NULL, 's'},
111 {"help", no_argument, &show_help, 1},
112 {"version", no_argument, &show_version, 1},
120 fprintf (stderr, _("Try `%s --help' for more information.\n"),
125 Usage: %s [OPTION]... [FILE]...\n\
129 Write each FILE to standard output, last line first.\n\
130 With no FILE, or when FILE is -, read standard input.\n\
132 -b, --before attach the separator before instead of after\n\
133 -r, --regex interpret the separator as a regular expression\n\
134 -s, --separator=STRING use STRING as the separator instead of newline\n\
135 --help display this help and exit\n\
136 --version output version information and exit\n\
138 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
140 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
143 /* Print the characters from START to PAST_END - 1.
144 If START is NULL, just flush the buffer. */
147 output (const char *start, const char *past_end)
149 static char buffer[WRITESIZE];
150 static int bytes_in_buffer = 0;
151 int bytes_to_add = past_end - start;
152 int bytes_available = WRITESIZE - bytes_in_buffer;
156 fwrite (buffer, 1, bytes_in_buffer, stdout);
161 /* Write out as many full buffers as possible. */
162 while (bytes_to_add >= bytes_available)
164 memcpy (buffer + bytes_in_buffer, start, bytes_available);
165 bytes_to_add -= bytes_available;
166 start += bytes_available;
167 fwrite (buffer, 1, WRITESIZE, stdout);
169 bytes_available = WRITESIZE;
172 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
173 bytes_in_buffer += bytes_to_add;
176 /* Print in reverse the file open on descriptor FD for reading FILE.
177 Return 0 if ok, 1 if an error occurs. */
180 tac_seekable (int input_fd, const char *file)
182 /* Pointer to the location in `G_buffer' where the search for
183 the next separator will begin. */
186 /* Pointer to one past the rightmost character in `G_buffer' that
187 has not been printed yet. */
190 /* Length of the record growing in `G_buffer'. */
191 size_t saved_record_size;
193 /* Offset in the file of the next read. */
196 /* Nonzero if `output' has not been called yet for any file.
197 Only used when the separator is attached to the preceding record. */
199 char first_char = *separator; /* Speed optimization, non-regexp. */
200 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
201 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
202 struct re_registers regs;
204 /* Find the size of the input file. */
205 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
207 return 0; /* It's an empty file. */
209 /* Arrange for the first read to lop off enough to leave the rest of the
210 file a multiple of `read_size'. Since `read_size' can change, this may
211 not always hold during the program run, but since it usually will, leave
212 it here for i/o efficiency (page/sector boundaries and all that).
213 Note: the efficiency gain has not been verified. */
214 saved_record_size = file_pos % read_size;
215 if (saved_record_size == 0)
216 saved_record_size = read_size;
217 file_pos -= saved_record_size;
218 /* `file_pos' now points to the start of the last (probably partial) block
219 in the input file. */
221 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
222 error (0, errno, "%s: seek failed", file);
224 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
226 error (0, errno, "%s", file);
230 match_start = past_end = G_buffer + saved_record_size;
231 /* For non-regexp search, move past impossible positions for a match. */
233 match_start -= match_length1;
237 /* Search backward from `match_start' - 1 to `G_buffer' for a match
238 with `separator'; for speed, use strncmp if `separator' contains no
240 If the match succeeds, set `match_start' to point to the start of
241 the match and `match_length' to the length of the match.
242 Otherwise, make `match_start' < `G_buffer'. */
243 if (sentinel_length == 0)
245 int i = match_start - G_buffer;
248 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
250 match_start = G_buffer - 1;
253 error (EXIT_FAILURE, 0,
254 _("error in regular expression search"));
258 match_start = G_buffer + regs.start[0];
259 match_length = regs.end[0] - regs.start[0];
264 /* `match_length' is constant for non-regexp boundaries. */
265 while (*--match_start != first_char
266 || (match_length1 && strncmp (match_start + 1, separator1,
271 /* Check whether we backed off the front of `G_buffer' without finding
272 a match for `separator'. */
273 if (match_start < G_buffer)
277 /* Hit the beginning of the file; print the remaining record. */
278 output (G_buffer, past_end);
282 saved_record_size = past_end - G_buffer;
283 if (saved_record_size > read_size)
285 /* `G_buffer_size' is about twice `read_size', so since
286 we want to read in another `read_size' bytes before
287 the data already in `G_buffer', we need to increase
290 int offset = sentinel_length ? sentinel_length : 1;
293 G_buffer_size = read_size * 2 + sentinel_length + 2;
294 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
296 /* Adjust the pointers for the new buffer location. */
297 match_start += newbuffer - G_buffer;
298 past_end += newbuffer - G_buffer;
299 G_buffer = newbuffer;
302 /* Back up to the start of the next bufferfull of the file. */
303 if (file_pos >= read_size)
304 file_pos -= read_size;
307 read_size = file_pos;
310 lseek (input_fd, file_pos, SEEK_SET);
312 /* Shift the pending record data right to make room for the new.
313 The source and destination regions probably overlap. */
314 memmove (G_buffer + read_size, G_buffer, saved_record_size);
315 past_end = G_buffer + read_size + saved_record_size;
316 /* For non-regexp searches, avoid unneccessary scanning. */
318 match_start = G_buffer + read_size;
320 match_start = past_end;
322 if (safe_read (input_fd, G_buffer, read_size) != read_size)
324 error (0, errno, "%s", file);
330 /* Found a match of `separator'. */
331 if (separator_ends_record)
333 char *match_end = match_start + match_length;
335 /* If this match of `separator' isn't at the end of the
336 file, print the record. */
337 if (first_time == 0 || match_end != past_end)
338 output (match_end, past_end);
339 past_end = match_end;
344 output (match_start, past_end);
345 past_end = match_start;
348 /* For non-regex matching, we can back up. */
349 if (sentinel_length > 0)
350 match_start -= match_length - 1;
355 /* Print FILE in reverse.
356 Return 0 if ok, 1 if an error occurs. */
359 tac_file (const char *file)
364 in = fopen (file, "r");
367 error (0, errno, "%s", file);
370 errors = tac_seekable (fileno (in), file);
371 if (ferror (in) || fclose (in) == EOF)
373 error (0, errno, "%s", file);
379 /* Make a copy of the standard input in `FIXME'. */
382 save_stdin (FILE **g_tmp, char **g_tempfile)
384 static char *template = NULL;
385 static char *tempdir;
386 static char *tempfile;
391 if (template == NULL)
393 tempdir = getenv ("TMPDIR");
395 tempdir = DEFAULT_TMPDIR;
396 template = xmalloc (strlen (tempdir) + 11);
398 sprintf (template, "%s/tacXXXXXX", tempdir);
399 tempfile = mktemp (template);
401 fd = open (tempfile, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
403 error (EXIT_FAILURE, errno, "%s", tempfile);
405 tmp = fdopen (fd, "w+");
407 error (EXIT_FAILURE, errno, "%s", tempfile);
413 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
417 error (EXIT_FAILURE, errno, _("stdin: read error"));
419 /* Don't bother checking for failure inside the loop -- check after. */
420 fwrite (G_buffer, 1, bytes_read, tmp);
423 if (ferror (tmp) || fflush (tmp) == EOF)
424 error (EXIT_FAILURE, errno, "%s", tempfile);
429 *g_tempfile = tempfile;
432 /* Print the standard input in reverse, saving it to temporary
433 file first if it is a pipe.
434 Return 0 if ok, 1 if an error occurs. */
442 /* No tempfile is needed for "tac < file".
443 Use fstat instead of checking for errno == ESPIPE because
444 lseek doesn't work on some special files but doesn't return an
446 if (fstat (STDIN_FILENO, &stats))
448 error (0, errno, _("standard input"));
452 if (S_ISREG (stats.st_mode))
454 errors = tac_seekable (fileno (stdin), _("standard input"));
460 save_stdin (&tmp_stream, &tmp_file);
461 errors = tac_seekable (fileno (tmp_stream), tmp_file);
467 /* BUF_END points one byte past the end of the buffer to be searched. */
470 memrchr (const char *buf_start, const char *buf_end, int c)
472 const char *p = buf_end;
473 while (buf_start <= --p)
475 if (*(const unsigned char *) p == c)
481 /* FIXME: describe */
484 tac_mem (const char *buf, size_t n_bytes, FILE *out)
492 nl = memrchr (buf, buf + n_bytes, '\n');
493 bol = (nl == NULL ? buf : nl + 1);
495 /* If the last line of the input file has no terminating newline,
496 treat it as a special case. */
497 if (bol < buf + n_bytes)
499 /* Print out the line from bol to end of input. */
500 fwrite (bol, 1, (buf + n_bytes) - bol, out);
502 /* Add a newline here. Otherwise, the first and second lines
503 of output would appear to have been joined. */
507 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
509 /* Output the line (which includes a trailing newline)
510 from NL+1 to BOL-1. */
511 fwrite (nl + 1, 1, bol - (nl + 1), out);
516 /* If there's anything left, output the last line: BUF .. BOL-1.
517 When the first byte of the input is a newline, there is nothing
520 fwrite (buf, 1, bol - buf, out);
522 /* FIXME: this is work in progress.... */
526 /* FIXME: describe */
529 tac_stdin_to_mem (void)
532 size_t bufsiz = 8 * BUFSIZ;
533 size_t delta = 8 * BUFSIZ;
540 buf = (char *) malloc (bufsiz);
542 buf = (char *) realloc (buf, bufsiz);
546 /* Free the buffer and fall back on the code that relies on a
552 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
556 error (EXIT_FAILURE, errno, _("stdin: read error"));
557 n_bytes += bytes_read;
562 tac_mem (buf, n_bytes, stdout);
568 main (int argc, char **argv)
570 const char *error_message; /* Return value from re_compile_pattern. */
572 int have_read_stdin = 0;
574 program_name = argv[0];
575 setlocale (LC_ALL, "");
576 bindtextdomain (PACKAGE, LOCALEDIR);
577 textdomain (PACKAGE);
582 separator_ends_record = 1;
584 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
591 separator_ends_record = 0;
599 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
608 printf ("tac (%s) %s\n", GNU_PACKAGE, VERSION);
615 if (sentinel_length == 0)
617 compiled_separator.allocated = 100;
618 compiled_separator.buffer = (unsigned char *)
619 xmalloc (compiled_separator.allocated);
620 compiled_separator.fastmap = xmalloc (256);
621 compiled_separator.translate = 0;
622 error_message = re_compile_pattern (separator, strlen (separator),
623 &compiled_separator);
625 error (EXIT_FAILURE, 0, "%s", error_message);
628 match_length = sentinel_length = strlen (separator);
630 read_size = INITIAL_READSIZE;
631 /* A precaution that will probably never be needed. */
632 while (sentinel_length * 2 >= read_size)
634 G_buffer_size = read_size * 2 + sentinel_length + 2;
635 G_buffer = xmalloc (G_buffer_size);
638 strcpy (G_buffer, separator);
639 G_buffer += sentinel_length;
649 errors = tac_stdin ();
653 for (; optind < argc; ++optind)
655 if (STREQ (argv[optind], "-"))
658 errors |= tac_stdin ();
662 errors |= tac_file (argv[optind]);
667 /* Flush the output buffer. */
668 output ((char *) NULL, (char *) NULL);
670 if (have_read_stdin && close (0) < 0)
671 error (EXIT_FAILURE, errno, "-");
672 if (ferror (stdout) || fclose (stdout) == EOF)
673 error (EXIT_FAILURE, errno, _("write error"));
674 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);