src/tac.c

   1 /* tac - concatenate and print files in reverse
   2    Copyright (C) 1988, 1989, 1990, 1991 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  17
  18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
  19    GNU enhancements by David MacKenzie (djm@ai.mit.edu). */
  20
  21 /* Copy each FILE, or the standard input if none are given or when a
  22    FILE name of "-" is encountered, to the standard output with the
  23    order of the records reversed.  The records are separated by
  24    instances of a string, or a newline if none is given.  By default, the
  25    separator string is attached to the end of the record that it
  26    follows in the file.
  27
  28    Options:
  29    -b, --before                 The separator is attached to the beginning
  30                                 of the record that it precedes in the file.
  31    -r, --regex                  The separator is a regular expression.
  32    -s, --separator=separator    Use SEPARATOR as the record separator.
  33
  34    To reverse a file byte by byte, use (in bash, ksh, or sh):
  35 tac -r -s '.\|
  36 ' file */
  37
  38 #include <stdio.h>
  39 #include <getopt.h>
  40 #include <sys/types.h>
  41 #include <signal.h>
  42 #include <regex.h>
  43 #include "system.h"
  44
  45 #ifndef STDC_HEADERS
  46 char *malloc ();
  47 char *realloc ();
  48 #endif
  49
  50 /* The number of bytes per atomic read. */
  51 #define INITIAL_READSIZE 8192
  52
  53 /* The number of bytes per atomic write. */
  54 #define WRITESIZE 8192
  55
  56 char *mktemp ();
  57
  58 RETSIGTYPE cleanup ();
  59 int tac ();
  60 int tac_file ();
  61 int tac_stdin ();
  62 char *xmalloc ();
  63 char *xrealloc ();
  64 void output ();
  65 void error ();
  66 void save_stdin ();
  67 void xwrite ();
  68
  69 /* The name this program was run with. */
  70 char *program_name;
  71
  72 /* The string that separates the records of the file. */
  73 char *separator;
  74
  75 /* If nonzero, print `separator' along with the record preceding it
  76    in the file; otherwise with the record following it. */
  77 int separator_ends_record;
  78
  79 /* 0 if `separator' is to be matched as a regular expression;
  80    otherwise, the length of `separator', used as a sentinel to
  81    stop the search. */
  82 int sentinel_length;
  83
  84 /* The length of a match with `separator'.  If `sentinel_length' is 0,
  85    `match_length' is computed every time a match succeeds;
  86    otherwise, it is simply the length of `separator'. */
  87 int match_length;
  88
  89 /* The input buffer. */
  90 char *buffer;
  91
  92 /* The number of bytes to read at once into `buffer'. */
  93 unsigned read_size;
  94
  95 /* The size of `buffer'.  This is read_size * 2 + sentinel_length + 2.
  96    The extra 2 bytes allow `past_end' to have a value beyond the
  97    end of `buffer' and `match_start' to run off the front of `buffer'. */
  98 unsigned buffer_size;
  99
 100 /* The compiled regular expression representing `separator'. */
 101 static struct re_pattern_buffer compiled_separator;
 102
 103 struct option longopts[] =
 104 {
 105   {"before", 0, &separator_ends_record, 0},
 106   {"regex", 0, &sentinel_length, 0},
 107   {"separator", 1, NULL, 's'},
 108   {NULL, 0, NULL, 0}
 109 };
 110
 111 void
 112 main (argc, argv)
 113      int argc;
 114      char **argv;
 115 {
 116   char *error_message;          /* Return value from re_compile_pattern. */
 117   int optc, errors;
 118   int have_read_stdin = 0;
 119
 120   program_name = argv[0];
 121   errors = 0;
 122   separator = "\n";
 123   sentinel_length = 1;
 124   separator_ends_record = 1;
 125
 126   while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
 127          != EOF)
 128     {
 129       switch (optc)
 130         {
 131         case 0:
 132           break;
 133         case 'b':
 134           separator_ends_record = 0;
 135           break;
 136         case 'r':
 137           sentinel_length = 0;
 138           break;
 139         case 's':
 140           separator = optarg;
 141           if (*separator == 0)
 142             error (1, 0, "separator cannot be empty");
 143           break;
 144         default:
 145           fprintf (stderr, "\
 146 Usage: %s [-br] [-s separator] [--before] [--regex] [--separator=separator]\n\
 147        [file...]\n",
 148                    program_name);
 149           exit (1);
 150         }
 151     }
 152
 153   if (sentinel_length == 0)
 154     {
 155       compiled_separator.allocated = 100;
 156       compiled_separator.buffer = (unsigned char *)
 157         xmalloc (compiled_separator.allocated);
 158       compiled_separator.fastmap = xmalloc (256);
 159       compiled_separator.translate = 0;
 160       error_message = re_compile_pattern (separator, strlen (separator),
 161                                           &compiled_separator);
 162       if (error_message)
 163         error (1, 0, "%s", error_message);
 164     }
 165   else
 166     match_length = sentinel_length = strlen (separator);
 167
 168   read_size = INITIAL_READSIZE;
 169   /* A precaution that will probably never be needed. */
 170   while (sentinel_length * 2 >= read_size)
 171     read_size *= 2;
 172   buffer_size = read_size * 2 + sentinel_length + 2;
 173   buffer = xmalloc (buffer_size);
 174   if (sentinel_length)
 175     {
 176       strcpy (buffer, separator);
 177       buffer += sentinel_length;
 178     }
 179   else
 180     ++buffer;
 181
 182   if (optind == argc)
 183     {
 184       have_read_stdin = 1;
 185       errors = tac_stdin ();
 186     }
 187   else
 188     for (; optind < argc; ++optind)
 189       {
 190         if (strcmp (argv[optind], "-") == 0)
 191           {
 192             have_read_stdin = 1;
 193             errors |= tac_stdin ();
 194           }
 195         else
 196           errors |= tac_file (argv[optind]);
 197       }
 198
 199   /* Flush the output buffer. */
 200   output ((char *) NULL, (char *) NULL);
 201
 202   if (have_read_stdin && close (0) < 0)
 203     error (1, errno, "-");
 204   if (close (1) < 0)
 205     error (1, errno, "write error");
 206   exit (errors);
 207 }
 208
 209 /* The name of a temporary file containing a copy of pipe input. */
 210 char *tempfile;
 211
 212 /* Print the standard input in reverse, saving it to temporary
 213    file `tempfile' first if it is a pipe.
 214    Return 0 if ok, 1 if an error occurs. */
 215
 216 int
 217 tac_stdin ()
 218 {
 219   /* Previous values of signal handlers. */
 220   RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
 221   int errors;
 222   struct stat stats;
 223 #ifdef _POSIX_VERSION
 224     struct sigaction oldact, newact;
 225 #endif                          /* _POSIX_VERSION */
 226
 227   /* No tempfile is needed for "tac < file".
 228      Use fstat instead of checking for errno == ESPIPE because
 229      lseek doesn't work on some special files but doesn't return an
 230      error, either. */
 231   if (fstat (0, &stats))
 232     {
 233       error (0, errno, "standard input");
 234       return 1;
 235     }
 236   if (S_ISREG (stats.st_mode))
 237     return tac (0, "standard input");
 238
 239 #ifdef _POSIX_VERSION
 240   newact.sa_handler = cleanup;
 241   sigemptyset (&newact.sa_mask);
 242   newact.sa_flags = 0;
 243
 244   sigaction (SIGINT, NULL, &oldact);
 245   sigint = oldact.sa_handler;
 246   if (sigint != SIG_IGN)
 247     sigaction (SIGINT, &newact, NULL);
 248
 249   sigaction (SIGHUP, NULL, &oldact);
 250   sighup = oldact.sa_handler;
 251   if (sighup != SIG_IGN)
 252     sigaction (SIGHUP, &newact, NULL);
 253
 254   sigaction (SIGPIPE, NULL, &oldact);
 255   sigpipe = oldact.sa_handler;
 256   if (sigpipe != SIG_IGN)
 257     sigaction (SIGPIPE, &newact, NULL);
 258
 259   sigaction (SIGTERM, NULL, &oldact);
 260   sigterm = oldact.sa_handler;
 261   if (sigterm != SIG_IGN)
 262     sigaction (SIGTERM, &newact, NULL);
 263 #else                           /* !_POSIX_VERSION */
 264   sigint = signal (SIGINT, SIG_IGN);
 265   if (sigint != SIG_IGN)
 266     signal (SIGINT, cleanup);
 267
 268   sighup = signal (SIGHUP, SIG_IGN);
 269   if (sighup != SIG_IGN)
 270     signal (SIGHUP, cleanup);
 271
 272   sigpipe = signal (SIGPIPE, SIG_IGN);
 273   if (sigpipe != SIG_IGN)
 274     signal (SIGPIPE, cleanup);
 275
 276   sigterm = signal (SIGTERM, SIG_IGN);
 277   if (sigterm != SIG_IGN)
 278     signal (SIGTERM, cleanup);
 279 #endif                          /* _POSIX_VERSION */
 280
 281   save_stdin ();
 282
 283   errors = tac_file (tempfile);
 284
 285   unlink (tempfile);
 286
 287 #ifdef _POSIX_VERSION
 288   newact.sa_handler = sigint;
 289   sigaction (SIGINT, &newact, NULL);
 290   newact.sa_handler = sighup;
 291   sigaction (SIGHUP, &newact, NULL);
 292   newact.sa_handler = sigterm;
 293   sigaction (SIGTERM, &newact, NULL);
 294   newact.sa_handler = sigpipe;
 295   sigaction (SIGPIPE, &newact, NULL);
 296 #else                           /* !_POSIX_VERSION */
 297   signal (SIGINT, sigint);
 298   signal (SIGHUP, sighup);
 299   signal (SIGTERM, sigterm);
 300   signal (SIGPIPE, sigpipe);
 301 #endif                          /* _POSIX_VERSION */
 302
 303   return errors;
 304 }
 305
 306 /* Make a copy of the standard input in `tempfile'. */
 307
 308 void
 309 save_stdin ()
 310 {
 311   static char *template = NULL;
 312   static char *tempdir;
 313   int fd;
 314   int bytes_read;
 315
 316   if (template == NULL)
 317     {
 318       tempdir = getenv ("TMPDIR");
 319       if (tempdir == NULL)
 320         tempdir = "/tmp";
 321       template = xmalloc (strlen (tempdir) + 11);
 322     }
 323   sprintf (template, "%s/tacXXXXXX", tempdir);
 324   tempfile = mktemp (template);
 325
 326   fd = creat (tempfile, 0600);
 327   if (fd == -1)
 328     {
 329       error (0, errno, "%s", tempfile);
 330       cleanup ();
 331     }
 332   while ((bytes_read = read (0, buffer, read_size)) > 0)
 333     if (write (fd, buffer, bytes_read) != bytes_read)
 334       {
 335         error (0, errno, "%s", tempfile);
 336         cleanup ();
 337       }
 338   if (close (fd) < 0)
 339     {
 340       error (0, errno, "%s", tempfile);
 341       cleanup ();
 342     }
 343   if (bytes_read == -1)
 344     {
 345       error (0, errno, "read error");
 346       cleanup ();
 347     }
 348 }
 349
 350 /* Print FILE in reverse.
 351    Return 0 if ok, 1 if an error occurs. */
 352
 353 int
 354 tac_file (file)
 355      char *file;
 356 {
 357   int fd, errors;
 358
 359   fd = open (file, 0);
 360   if (fd == -1)
 361     {
 362       error (0, errno, "%s", file);
 363       return 1;
 364     }
 365   errors = tac (fd, file);
 366   if (close (fd) < 0)
 367     {
 368       error (0, errno, "%s", file);
 369       return 1;
 370     }
 371   return errors;
 372 }
 373
 374 /* Print in reverse the file open on descriptor FD for reading FILE.
 375    Return 0 if ok, 1 if an error occurs. */
 376
 377 int
 378 tac (fd, file)
 379      int fd;
 380      char *file;
 381 {
 382   /* Pointer to the location in `buffer' where the search for
 383      the next separator will begin. */
 384   char *match_start;
 385   /* Pointer to one past the rightmost character in `buffer' that
 386      has not been printed yet. */
 387   char *past_end;
 388   unsigned saved_record_size;   /* Length of the record growing in `buffer'. */
 389   off_t file_pos;               /* Offset in the file of the next read. */
 390   /* Nonzero if `output' has not been called yet for any file.
 391      Only used when the separator is attached to the preceding record. */
 392   int first_time = 1;
 393   char first_char = *separator; /* Speed optimization, non-regexp. */
 394   char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
 395   int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
 396   struct re_registers regs;
 397
 398   /* Find the size of the input file. */
 399   file_pos = lseek (fd, (off_t) 0, SEEK_END);
 400   if (file_pos < 1)
 401     return 0;                   /* It's an empty file. */
 402
 403   /* Arrange for the first read to lop off enough to leave the rest of the
 404      file a multiple of `read_size'.  Since `read_size' can change, this may
 405      not always hold during the program run, but since it usually will, leave
 406      it here for i/o efficiency (page/sector boundaries and all that).
 407      Note: the efficiency gain has not been verified. */
 408   saved_record_size = file_pos % read_size;
 409   if (saved_record_size == 0)
 410     saved_record_size = read_size;
 411   file_pos -= saved_record_size;
 412   /* `file_pos' now points to the start of the last (probably partial) block
 413      in the input file. */
 414
 415   lseek (fd, file_pos, SEEK_SET);
 416   if (read (fd, buffer, saved_record_size) != saved_record_size)
 417     {
 418       error (0, 1, "%s", file);
 419       return 1;
 420     }
 421
 422   match_start = past_end = buffer + saved_record_size;
 423   /* For non-regexp search, move past impossible positions for a match. */
 424   if (sentinel_length)
 425     match_start -= match_length1;
 426
 427   for (;;)
 428     {
 429       /* Search backward from `match_start' - 1 to `buffer' for a match
 430          with `separator'; for speed, use strncmp if `separator' contains no
 431          metacharacters.
 432          If the match succeeds, set `match_start' to point to the start of
 433          the match and `match_length' to the length of the match.
 434          Otherwise, make `match_start' < `buffer'. */
 435       if (sentinel_length == 0)
 436         {
 437           int i = match_start - buffer;
 438           int ret;
 439
 440           ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
 441           if (ret == -1)
 442             match_start = buffer - 1;
 443           else if (ret == -2)
 444             {
 445               error (0, 0, "error in regular expression search");
 446               cleanup ();
 447             }
 448           else
 449             {
 450               match_start = buffer + regs.start[0];
 451               match_length = regs.end[0] - regs.start[0];
 452             }
 453         }
 454       else
 455         {
 456           /* `match_length' is constant for non-regexp boundaries. */
 457           while (*--match_start != first_char
 458                  || (match_length1 && strncmp (match_start + 1, separator1,
 459                                                match_length1)))
 460             /* Do nothing. */ ;
 461         }
 462
 463       /* Check whether we backed off the front of `buffer' without finding
 464          a match for `separator'. */
 465       if (match_start < buffer)
 466         {
 467           if (file_pos == 0)
 468             {
 469               /* Hit the beginning of the file; print the remaining record. */
 470               output (buffer, past_end);
 471               return 0;
 472             }
 473
 474           saved_record_size = past_end - buffer;
 475           if (saved_record_size > read_size)
 476             {
 477               /* `buffer_size' is about twice `read_size', so since
 478                  we want to read in another `read_size' bytes before
 479                  the data already in `buffer', we need to increase
 480                  `buffer_size'. */
 481               char *newbuffer;
 482               int offset = sentinel_length ? sentinel_length : 1;
 483
 484               read_size *= 2;
 485               buffer_size = read_size * 2 + sentinel_length + 2;
 486               newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
 487               /* Adjust the pointers for the new buffer location.  */
 488               match_start += newbuffer - buffer;
 489               past_end += newbuffer - buffer;
 490               buffer = newbuffer;
 491             }
 492
 493           /* Back up to the start of the next bufferfull of the file.  */
 494           if (file_pos >= read_size)
 495             file_pos -= read_size;
 496           else
 497             {
 498               read_size = file_pos;
 499               file_pos = 0;
 500             }
 501           lseek (fd, file_pos, SEEK_SET);
 502
 503           /* Shift the pending record data right to make room for the new. */
 504           bcopy (buffer, buffer + read_size, saved_record_size);
 505           past_end = buffer + read_size + saved_record_size;
 506           /* For non-regexp searches, avoid unneccessary scanning. */
 507           if (sentinel_length)
 508             match_start = buffer + read_size;
 509           else
 510             match_start = past_end;
 511
 512           if (read (fd, buffer, read_size) != read_size)
 513             {
 514               error (0, errno, "%s", file);
 515               return 1;
 516             }
 517         }
 518       else
 519         {
 520           /* Found a match of `separator'. */
 521           if (separator_ends_record)
 522             {
 523               char *match_end = match_start + match_length;
 524
 525               /* If this match of `separator' isn't at the end of the
 526                  file, print the record. */
 527               if (first_time == 0 || match_end != past_end)
 528                 output (match_end, past_end);
 529               past_end = match_end;
 530               first_time = 0;
 531             }
 532           else
 533             {
 534               output (match_start, past_end);
 535               past_end = match_start;
 536             }
 537           match_start -= match_length - 1;
 538         }
 539     }
 540 }
 541
 542 /* Print the characters from START to PAST_END - 1.
 543    If START is NULL, just flush the buffer. */
 544
 545 void
 546 output (start, past_end)
 547      char *start;
 548      char *past_end;
 549 {
 550   static char buffer[WRITESIZE];
 551   static int bytes_in_buffer = 0;
 552   int bytes_to_add = past_end - start;
 553   int bytes_available = WRITESIZE - bytes_in_buffer;
 554
 555   if (start == 0)
 556     {
 557       xwrite (1, buffer, bytes_in_buffer);
 558       bytes_in_buffer = 0;
 559       return;
 560     }
 561
 562   /* Write out as many full buffers as possible. */
 563   while (bytes_to_add >= bytes_available)
 564     {
 565       bcopy (start, buffer + bytes_in_buffer, bytes_available);
 566       bytes_to_add -= bytes_available;
 567       start += bytes_available;
 568       xwrite (1, buffer, WRITESIZE);
 569       bytes_in_buffer = 0;
 570       bytes_available = WRITESIZE;
 571     }
 572
 573   bcopy (start, buffer + bytes_in_buffer, bytes_to_add);
 574   bytes_in_buffer += bytes_to_add;
 575 }
 576
 577 RETSIGTYPE
 578 cleanup ()
 579 {
 580   unlink (tempfile);
 581   exit (1);
 582 }
 583
 584 void
 585 xwrite (desc, buffer, size)
 586      int desc;
 587      char *buffer;
 588      int size;
 589 {
 590   if (write (desc, buffer, size) != size)
 591     {
 592       error (0, errno, "write error");
 593       cleanup ();
 594     }
 595 }
 596
 597 /* Allocate N bytes of memory dynamically, with error checking.  */
 598
 599 char *
 600 xmalloc (n)
 601      unsigned n;
 602 {
 603   char *p;
 604
 605   p = malloc (n);
 606   if (p == 0)
 607     {
 608       error (0, 0, "virtual memory exhausted");
 609       cleanup ();
 610     }
 611   return p;
 612 }
 613
 614 /* Change the size of memory area P to N bytes, with error checking. */
 615
 616 char *
 617 xrealloc (p, n)
 618      char *p;
 619      unsigned n;
 620 {
 621   p = realloc (p, n);
 622   if (p == 0)
 623     {
 624       error (0, 0, "virtual memory exhausted");
 625       cleanup ();
 626     }
 627   return p;
 628 }