src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2007 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <sys/wait.h>
  29 #include <signal.h>
  30 #include "system.h"
  31 #include "argmatch.h"
  32 #include "error.h"
  33 #include "hard-locale.h"
  34 #include "hash.h"
  35 #include "inttostr.h"
  36 #include "md5.h"
  37 #include "physmem.h"
  38 #include "posixver.h"
  39 #include "quote.h"
  40 #include "randread.h"
  41 #include "stdio--.h"
  42 #include "stdlib--.h"
  43 #include "strnumcmp.h"
  44 #include "xmemcoll.h"
  45 #include "xmemxfrm.h"
  46 #include "xstrtol.h"
  47
  48 #if HAVE_SYS_RESOURCE_H
  49 # include <sys/resource.h>
  50 #endif
  51 #ifndef RLIMIT_DATA
  52 struct rlimit { size_t rlim_cur; };
  53 # define getrlimit(Resource, Rlp) (-1)
  54 #endif
  55
  56 /* The official name of this program (e.g., no `g' prefix).  */
  57 #define PROGRAM_NAME "sort"
  58
  59 #define AUTHORS "Mike Haertel", "Paul Eggert"
  60
  61 #if HAVE_LANGINFO_CODESET
  62 # include <langinfo.h>
  63 #endif
  64
  65 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  66    present.  */
  67 #ifndef SA_NOCLDSTOP
  68 # define SA_NOCLDSTOP 0
  69 /* No sigprocmask.  Always 'return' zero. */
  70 # define sigprocmask(How, Set, Oset) (0)
  71 # define sigset_t int
  72 # if ! HAVE_SIGINTERRUPT
  73 #  define siginterrupt(sig, flag) /* empty */
  74 # endif
  75 #endif
  76
  77 #ifndef STDC_HEADERS
  78 double strtod ();
  79 #endif
  80
  81 #define UCHAR_LIM (UCHAR_MAX + 1)
  82
  83 #ifndef DEFAULT_TMPDIR
  84 # define DEFAULT_TMPDIR "/tmp"
  85 #endif
  86
  87 /* Exit statuses.  */
  88 enum
  89   {
  90     /* POSIX says to exit with status 1 if invoked with -c and the
  91        input is not properly sorted.  */
  92     SORT_OUT_OF_ORDER = 1,
  93
  94     /* POSIX says any other irregular exit must exit with a status
  95        code greater than 1.  */
  96     SORT_FAILURE = 2
  97   };
  98
  99 enum
 100   {
 101     /* The number of times we should try to fork a compression process
 102        (we retry if the fork call fails).  We don't _need_ to compress
 103        temp files, this is just to reduce disk access, so this number
 104        can be small.  */
 105     MAX_FORK_TRIES_COMPRESS = 2,
 106
 107     /* The number of times we should try to fork a decompression process.
 108        If we can't fork a decompression process, we can't sort, so this
 109        number should be big.  */
 110     MAX_FORK_TRIES_DECOMPRESS = 8
 111   };
 112
 113 /* The representation of the decimal point in the current locale.  */
 114 static int decimal_point;
 115
 116 /* Thousands separator; if -1, then there isn't one.  */
 117 static int thousands_sep;
 118
 119 /* Nonzero if the corresponding locales are hard.  */
 120 static bool hard_LC_COLLATE;
 121 #if HAVE_NL_LANGINFO
 122 static bool hard_LC_TIME;
 123 #endif
 124
 125 #define NONZERO(x) ((x) != 0)
 126
 127 /* The kind of blanks for '-b' to skip in various options. */
 128 enum blanktype { bl_start, bl_end, bl_both };
 129
 130 /* The character marking end of line. Default to \n. */
 131 static char eolchar = '\n';
 132
 133 /* Lines are held in core as counted strings. */
 134 struct line
 135 {
 136   char *text;                   /* Text of the line. */
 137   size_t length;                /* Length including final newline. */
 138   char *keybeg;                 /* Start of first key. */
 139   char *keylim;                 /* Limit of first key. */
 140 };
 141
 142 /* Input buffers. */
 143 struct buffer
 144 {
 145   char *buf;                    /* Dynamically allocated buffer,
 146                                    partitioned into 3 regions:
 147                                    - input data;
 148                                    - unused area;
 149                                    - an array of lines, in reverse order.  */
 150   size_t used;                  /* Number of bytes used for input data.  */
 151   size_t nlines;                /* Number of lines in the line array.  */
 152   size_t alloc;                 /* Number of bytes allocated. */
 153   size_t left;                  /* Number of bytes left from previous reads. */
 154   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 155   bool eof;                     /* An EOF has been read.  */
 156 };
 157
 158 struct keyfield
 159 {
 160   size_t sword;                 /* Zero-origin 'word' to start at. */
 161   size_t schar;                 /* Additional characters to skip. */
 162   size_t eword;                 /* Zero-origin first word after field. */
 163   size_t echar;                 /* Additional characters in field. */
 164   bool const *ignore;           /* Boolean array of characters to ignore. */
 165   char const *translate;        /* Translation applied to characters. */
 166   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 167   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 168   bool numeric;                 /* Flag for numeric comparison.  Handle
 169                                    strings of digits with optional decimal
 170                                    point, but no exponential notation. */
 171   bool random;                  /* Sort by random hash of key.  */
 172   bool general_numeric;         /* Flag for general, numeric comparison.
 173                                    Handle numbers in exponential notation. */
 174   bool month;                   /* Flag for comparison by month name. */
 175   bool reverse;                 /* Reverse the sense of comparison. */
 176   struct keyfield *next;        /* Next keyfield to try. */
 177 };
 178
 179 struct month
 180 {
 181   char const *name;
 182   int val;
 183 };
 184
 185 /* The name this program was run with. */
 186 char *program_name;
 187
 188 /* FIXME: None of these tables work with multibyte character sets.
 189    Also, there are many other bugs when handling multibyte characters.
 190    One way to fix this is to rewrite `sort' to use wide characters
 191    internally, but doing this with good performance is a bit
 192    tricky.  */
 193
 194 /* Table of blanks.  */
 195 static bool blanks[UCHAR_LIM];
 196
 197 /* Table of non-printing characters. */
 198 static bool nonprinting[UCHAR_LIM];
 199
 200 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 201 static bool nondictionary[UCHAR_LIM];
 202
 203 /* Translation table folding lower case to upper.  */
 204 static char fold_toupper[UCHAR_LIM];
 205
 206 #define MONTHS_PER_YEAR 12
 207
 208 /* Table mapping month names to integers.
 209    Alphabetic order allows binary search. */
 210 static struct month monthtab[] =
 211 {
 212   {"APR", 4},
 213   {"AUG", 8},
 214   {"DEC", 12},
 215   {"FEB", 2},
 216   {"JAN", 1},
 217   {"JUL", 7},
 218   {"JUN", 6},
 219   {"MAR", 3},
 220   {"MAY", 5},
 221   {"NOV", 11},
 222   {"OCT", 10},
 223   {"SEP", 9}
 224 };
 225
 226 /* During the merge phase, the number of files to merge at once. */
 227 #define NMERGE 16
 228
 229 /* Minimum size for a merge or check buffer.  */
 230 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 231
 232 /* Minimum sort size; the code might not work with smaller sizes.  */
 233 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 234
 235 /* The number of bytes needed for a merge or check buffer, which can
 236    function relatively efficiently even if it holds only one line.  If
 237    a longer line is seen, this value is increased.  */
 238 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 239
 240 /* The approximate maximum number of bytes of main memory to use, as
 241    specified by the user.  Zero if the user has not specified a size.  */
 242 static size_t sort_size;
 243
 244 /* The guessed size for non-regular files.  */
 245 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 246
 247 /* Array of directory names in which any temporary files are to be created. */
 248 static char const **temp_dirs;
 249
 250 /* Number of temporary directory names used.  */
 251 static size_t temp_dir_count;
 252
 253 /* Number of allocated slots in temp_dirs.  */
 254 static size_t temp_dir_alloc;
 255
 256 /* Flag to reverse the order of all comparisons. */
 257 static bool reverse;
 258
 259 /* Flag for stable sort.  This turns off the last ditch bytewise
 260    comparison of lines, and instead leaves lines in the same order
 261    they were read if all keys compare equal.  */
 262 static bool stable;
 263
 264 /* If TAB has this value, blanks separate fields.  */
 265 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 266
 267 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 268    separated by the empty string between a non-blank character and a blank
 269    character. */
 270 static int tab = TAB_DEFAULT;
 271
 272 /* Flag to remove consecutive duplicate lines from the output.
 273    Only the last of a sequence of equal lines will be output. */
 274 static bool unique;
 275
 276 /* Nonzero if any of the input files are the standard input. */
 277 static bool have_read_stdin;
 278
 279 /* List of key field comparisons to be tried.  */
 280 static struct keyfield *keylist;
 281
 282 /* Program used to (de)compress temp files.  Must accept -d.  */
 283 static char const *compress_program;
 284
 285 static void sortlines_temp (struct line *, size_t, struct line *);
 286
 287 /* Report MESSAGE for FILE, then clean up and exit.
 288    If FILE is null, it represents standard output.  */
 289
 290 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 291 static void
 292 die (char const *message, char const *file)
 293 {
 294   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 295   exit (SORT_FAILURE);
 296 }
 297
 298 void
 299 usage (int status)
 300 {
 301   if (status != EXIT_SUCCESS)
 302     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 303              program_name);
 304   else
 305     {
 306       printf (_("\
 307 Usage: %s [OPTION]... [FILE]...\n\
 308 "),
 309               program_name);
 310       fputs (_("\
 311 Write sorted concatenation of all FILE(s) to standard output.\n\
 312 \n\
 313 "), stdout);
 314       fputs (_("\
 315 Mandatory arguments to long options are mandatory for short options too.\n\
 316 "), stdout);
 317       fputs (_("\
 318 Ordering options:\n\
 319 \n\
 320 "), stdout);
 321       fputs (_("\
 322   -b, --ignore-leading-blanks  ignore leading blanks\n\
 323   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 324   -f, --ignore-case           fold lower case to upper case characters\n\
 325 "), stdout);
 326       fputs (_("\
 327   -g, --general-numeric-sort  compare according to general numerical value\n\
 328   -i, --ignore-nonprinting    consider only printable characters\n\
 329   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 330   -n, --numeric-sort          compare according to string numerical value\n\
 331   -R, --random-sort           sort by random hash of keys\n\
 332       --random-source=FILE    get random bytes from FILE (default /dev/urandom)\n\
 333   -r, --reverse               reverse the result of comparisons\n\
 334 \n\
 335 "), stdout);
 336       fputs (_("\
 337 Other options:\n\
 338 \n\
 339   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 340   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 341       --compress-program=PROG  compress temporaries with PROG;\n\
 342                               decompress them with PROG -d\n\
 343   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
 344   -m, --merge               merge already sorted files; do not sort\n\
 345 "), stdout);
 346       fputs (_("\
 347   -o, --output=FILE         write result to FILE instead of standard output\n\
 348   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 349   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 350 "), stdout);
 351       printf (_("\
 352   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 353   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 354                               multiple options specify multiple directories\n\
 355   -u, --unique              with -c, check for strict ordering;\n\
 356                               without -c, output only the first of an equal run\n\
 357 "), DEFAULT_TMPDIR);
 358       fputs (_("\
 359   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 360 "), stdout);
 361       fputs (HELP_OPTION_DESCRIPTION, stdout);
 362       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 363       fputs (_("\
 364 \n\
 365 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 366 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 367 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 368 one or more single-letter ordering options, which override global ordering\n\
 369 options for that key.  If no key is given, use the entire line as the key.\n\
 370 \n\
 371 SIZE may be followed by the following multiplicative suffixes:\n\
 372 "), stdout);
 373       fputs (_("\
 374 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 375 \n\
 376 With no FILE, or when FILE is -, read standard input.\n\
 377 \n\
 378 *** WARNING ***\n\
 379 The locale specified by the environment affects sort order.\n\
 380 Set LC_ALL=C to get the traditional sort order that uses\n\
 381 native byte values.\n\
 382 "), stdout );
 383       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 384     }
 385
 386   exit (status);
 387 }
 388
 389 /* For long options that have no equivalent short option, use a
 390    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 391 enum
 392 {
 393   CHECK_OPTION = CHAR_MAX + 1,
 394   COMPRESS_PROGRAM_OPTION,
 395   RANDOM_SOURCE_OPTION
 396 };
 397
 398 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
 399
 400 static struct option const long_options[] =
 401 {
 402   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 403   {"check", optional_argument, NULL, CHECK_OPTION},
 404   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 405   {"dictionary-order", no_argument, NULL, 'd'},
 406   {"ignore-case", no_argument, NULL, 'f'},
 407   {"general-numeric-sort", no_argument, NULL, 'g'},
 408   {"ignore-nonprinting", no_argument, NULL, 'i'},
 409   {"key", required_argument, NULL, 'k'},
 410   {"merge", no_argument, NULL, 'm'},
 411   {"month-sort", no_argument, NULL, 'M'},
 412   {"numeric-sort", no_argument, NULL, 'n'},
 413   {"random-sort", no_argument, NULL, 'R'},
 414   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 415   {"output", required_argument, NULL, 'o'},
 416   {"reverse", no_argument, NULL, 'r'},
 417   {"stable", no_argument, NULL, 's'},
 418   {"buffer-size", required_argument, NULL, 'S'},
 419   {"field-separator", required_argument, NULL, 't'},
 420   {"temporary-directory", required_argument, NULL, 'T'},
 421   {"unique", no_argument, NULL, 'u'},
 422   {"zero-terminated", no_argument, NULL, 'z'},
 423   {GETOPT_HELP_OPTION_DECL},
 424   {GETOPT_VERSION_OPTION_DECL},
 425   {NULL, 0, NULL, 0},
 426 };
 427
 428 static char const *const check_args[] =
 429 {
 430   "quiet", "silent", "diagnose-first", NULL
 431 };
 432 static char const check_types[] =
 433 {
 434   'C', 'C', 'c'
 435 };
 436 ARGMATCH_VERIFY (check_args, check_types);
 437
 438 /* The set of signals that are caught.  */
 439 static sigset_t caught_signals;
 440
 441 /* Critical section status.  */
 442 struct cs_status
 443 {
 444   bool valid;
 445   sigset_t sigs;
 446 };
 447
 448 /* Enter a critical section.  */
 449 static struct cs_status
 450 cs_enter (void)
 451 {
 452   struct cs_status status;
 453   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 454   return status;
 455 }
 456
 457 /* Leave a critical section.  */
 458 static void
 459 cs_leave (struct cs_status status)
 460 {
 461   if (status.valid)
 462     {
 463       /* Ignore failure when restoring the signal mask. */
 464       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 465     }
 466 }
 467
 468 /* The list of temporary files. */
 469 struct tempnode
 470 {
 471   struct tempnode *volatile next;
 472   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 473   char name[1];  /* Actual size is 1 + file name length.  */
 474 };
 475 static struct tempnode *volatile temphead;
 476 static struct tempnode *volatile *temptail = &temphead;
 477
 478 struct sortfile
 479 {
 480   char const *name;
 481   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 482 };
 483
 484 /* A table where we store compression process states.  We clean up all
 485    processes in a timely manner so as not to exhaust system resources,
 486    so we store the info on whether the process is still running, or has
 487    been reaped here.  */
 488 static Hash_table *proctab;
 489
 490 enum { INIT_PROCTAB_SIZE = 47 };
 491
 492 enum procstate { ALIVE, ZOMBIE };
 493
 494 /* A proctab entry.  The COUNT field is there in case we fork a new
 495    compression process that has the same PID as an old zombie process
 496    that is still in the table (because the process to decompress the
 497    temp file it was associated with hasn't started yet).  */
 498 struct procnode
 499 {
 500   pid_t pid;
 501   enum procstate state;
 502   size_t count;
 503 };
 504
 505 static size_t
 506 proctab_hasher (const void *entry, size_t tabsize)
 507 {
 508   const struct procnode *node = entry;
 509   return node->pid % tabsize;
 510 }
 511
 512 static bool
 513 proctab_comparator (const void *e1, const void *e2)
 514 {
 515   const struct procnode *n1 = e1, *n2 = e2;
 516   return n1->pid == n2->pid;
 517 }
 518
 519 /* The total number of forked processes (compressors and decompressors)
 520    that have not been reaped yet. */
 521 static size_t nprocs;
 522
 523 /* The number of child processes we'll allow before we try to reap some. */
 524 enum { MAX_PROCS_BEFORE_REAP = 2 };
 525
 526 /* If 0 < PID, wait for the child process with that PID to exit.
 527    If PID is -1, clean up a random child process which has finished and
 528    return the process ID of that child.  If PID is -1 and no processes
 529    have quit yet, return 0 without waiting.  */
 530
 531 static pid_t
 532 reap (pid_t pid)
 533 {
 534   int status;
 535   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 536
 537   if (cpid < 0)
 538     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 539            compress_program);
 540   else if (0 < cpid)
 541     {
 542       if (! WIFEXITED (status) || WEXITSTATUS (status))
 543         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 544                compress_program);
 545       --nprocs;
 546     }
 547
 548   return cpid;
 549 }
 550
 551 /* Add the PID of a running compression process to proctab, or update
 552    the entry COUNT and STATE fields if it's already there.  This also
 553    creates the table for us the first time it's called.  */
 554
 555 static void
 556 register_proc (pid_t pid)
 557 {
 558   struct procnode test, *node;
 559
 560   if (! proctab)
 561     {
 562       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 563                                  proctab_hasher,
 564                                  proctab_comparator,
 565                                  free);
 566       if (! proctab)
 567         xalloc_die ();
 568     }
 569
 570   test.pid = pid;
 571   node = hash_lookup (proctab, &test);
 572   if (node)
 573     {
 574       node->state = ALIVE;
 575       ++node->count;
 576     }
 577   else
 578     {
 579       node = xmalloc (sizeof *node);
 580       node->pid = pid;
 581       node->state = ALIVE;
 582       node->count = 1;
 583       hash_insert (proctab, node);
 584     }
 585 }
 586
 587 /* This is called when we reap a random process.  We don't know
 588    whether we have reaped a compression process or a decompression
 589    process until we look in the table.  If there's an ALIVE entry for
 590    it, then we have reaped a compression process, so change the state
 591    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 592
 593 static void
 594 update_proc (pid_t pid)
 595 {
 596   struct procnode test, *node;
 597
 598   test.pid = pid;
 599   node = hash_lookup (proctab, &test);
 600   if (node)
 601     node->state = ZOMBIE;
 602 }
 603
 604 /* This is for when we need to wait for a compression process to exit.
 605    If it has a ZOMBIE entry in the table then it's already dead and has
 606    been reaped.  Note that if there's an ALIVE entry for it, it still may
 607    already have died and been reaped if a second process was created with
 608    the same PID.  This is probably exceedingly rare, but to be on the safe
 609    side we will have to wait for any compression process with this PID.  */
 610
 611 static void
 612 wait_proc (pid_t pid)
 613 {
 614   struct procnode test, *node;
 615
 616   test.pid = pid;
 617   node = hash_lookup (proctab, &test);
 618   if (node->state == ALIVE)
 619     reap (pid);
 620
 621   node->state = ZOMBIE;
 622   if (! --node->count)
 623     {
 624       hash_delete (proctab, node);
 625       free (node);
 626     }
 627 }
 628
 629 /* Keep reaping finished children as long as there are more to reap.
 630    This doesn't block waiting for any of them, it only reaps those
 631    that are already dead.  */
 632
 633 static void
 634 reap_some (void)
 635 {
 636   pid_t pid;
 637
 638   while (0 < nprocs && (pid = reap (-1)))
 639     update_proc (pid);
 640 }
 641
 642 /* Clean up any remaining temporary files.  */
 643
 644 static void
 645 cleanup (void)
 646 {
 647   struct tempnode const *node;
 648
 649   for (node = temphead; node; node = node->next)
 650     unlink (node->name);
 651   temphead = NULL;
 652 }
 653
 654 /* Cleanup actions to take when exiting.  */
 655
 656 static void
 657 exit_cleanup (void)
 658 {
 659   if (temphead)
 660     {
 661       /* Clean up any remaining temporary files in a critical section so
 662          that a signal handler does not try to clean them too.  */
 663       struct cs_status cs = cs_enter ();
 664       cleanup ();
 665       cs_leave (cs);
 666     }
 667
 668   close_stdout ();
 669 }
 670
 671 /* Create a new temporary file, returning its newly allocated tempnode.
 672    Store into *PFD the file descriptor open for writing.  */
 673
 674 static struct tempnode *
 675 create_temp_file (int *pfd)
 676 {
 677   static char const slashbase[] = "/sortXXXXXX";
 678   static size_t temp_dir_index;
 679   int fd;
 680   int saved_errno;
 681   char const *temp_dir = temp_dirs[temp_dir_index];
 682   size_t len = strlen (temp_dir);
 683   struct tempnode *node =
 684     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 685   char *file = node->name;
 686   struct cs_status cs;
 687
 688   memcpy (file, temp_dir, len);
 689   memcpy (file + len, slashbase, sizeof slashbase);
 690   node->next = NULL;
 691   node->pid = 0;
 692   if (++temp_dir_index == temp_dir_count)
 693     temp_dir_index = 0;
 694
 695   /* Create the temporary file in a critical section, to avoid races.  */
 696   cs = cs_enter ();
 697   fd = mkstemp (file);
 698   if (0 <= fd)
 699     {
 700       *temptail = node;
 701       temptail = &node->next;
 702     }
 703   saved_errno = errno;
 704   cs_leave (cs);
 705   errno = saved_errno;
 706
 707   if (fd < 0)
 708     die (_("cannot create temporary file"), file);
 709
 710   *pfd = fd;
 711   return node;
 712 }
 713
 714 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 715    standard output; HOW should be "w".  When opening for input, "-"
 716    means standard input.  To avoid confusion, do not return file
 717    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 718    opening an ordinary FILE.  */
 719
 720 static FILE *
 721 xfopen (const char *file, const char *how)
 722 {
 723   FILE *fp;
 724
 725   if (!file)
 726     fp = stdout;
 727   else if (STREQ (file, "-") && *how == 'r')
 728     {
 729       have_read_stdin = true;
 730       fp = stdin;
 731     }
 732   else
 733     {
 734       fp = fopen (file, how);
 735       if (! fp)
 736         die (_("open failed"), file);
 737     }
 738
 739   return fp;
 740 }
 741
 742 /* Close FP, whose name is FILE, and report any errors.  */
 743
 744 static void
 745 xfclose (FILE *fp, char const *file)
 746 {
 747   switch (fileno (fp))
 748     {
 749     case STDIN_FILENO:
 750       /* Allow reading stdin from tty more than once.  */
 751       if (feof (fp))
 752         clearerr (fp);
 753       break;
 754
 755     case STDOUT_FILENO:
 756       /* Don't close stdout just yet.  close_stdout does that.  */
 757       if (fflush (fp) != 0)
 758         die (_("fflush failed"), file);
 759       break;
 760
 761     default:
 762       if (fclose (fp) != 0)
 763         die (_("close failed"), file);
 764       break;
 765     }
 766 }
 767
 768 static void
 769 dup2_or_die (int oldfd, int newfd)
 770 {
 771   if (dup2 (oldfd, newfd) < 0)
 772     error (SORT_FAILURE, errno, _("dup2 failed"));
 773 }
 774
 775 /* Fork a child process for piping to and do common cleanup.  The
 776    TRIES parameter tells us how many times to try to fork before
 777    giving up.  Return the PID of the child or -1 if fork failed.  */
 778
 779 static pid_t
 780 pipe_fork (int pipefds[2], size_t tries)
 781 {
 782 #if HAVE_WORKING_FORK
 783   struct tempnode *saved_temphead;
 784   int saved_errno;
 785   unsigned int wait_retry = 1;
 786   pid_t pid IF_LINT (= -1);
 787   struct cs_status cs;
 788
 789   if (pipe (pipefds) < 0)
 790     return -1;
 791
 792   while (tries--)
 793     {
 794       /* This is so the child process won't delete our temp files
 795          if it receives a signal before exec-ing.  */
 796       cs = cs_enter ();
 797       saved_temphead = temphead;
 798       temphead = NULL;
 799
 800       pid = fork ();
 801       saved_errno = errno;
 802       if (pid)
 803         temphead = saved_temphead;
 804
 805       cs_leave (cs);
 806       errno = saved_errno;
 807
 808       if (0 <= pid || errno != EAGAIN)
 809         break;
 810       else
 811         {
 812           sleep (wait_retry);
 813           wait_retry *= 2;
 814           reap_some ();
 815         }
 816     }
 817
 818   if (pid < 0)
 819     {
 820       close (pipefds[0]);
 821       close (pipefds[1]);
 822     }
 823   else if (pid == 0)
 824     {
 825       close (STDIN_FILENO);
 826       close (STDOUT_FILENO);
 827     }
 828   else
 829     ++nprocs;
 830
 831   return pid;
 832
 833 #else  /* ! HAVE_WORKING_FORK */
 834   return -1;
 835 #endif
 836 }
 837
 838 /* Create a temporary file and start a compression program to filter output
 839    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 840    set it to the PID of the newly-created process.  */
 841
 842 static char *
 843 create_temp (FILE **pfp, pid_t *ppid)
 844 {
 845   int tempfd;
 846   struct tempnode *node = create_temp_file (&tempfd);
 847   char *name = node->name;
 848
 849   if (compress_program)
 850     {
 851       int pipefds[2];
 852
 853       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 854       if (0 < node->pid)
 855         {
 856           close (tempfd);
 857           close (pipefds[0]);
 858           tempfd = pipefds[1];
 859
 860           register_proc (node->pid);
 861         }
 862       else if (node->pid == 0)
 863         {
 864           close (pipefds[1]);
 865           dup2_or_die (tempfd, STDOUT_FILENO);
 866           close (tempfd);
 867           dup2_or_die (pipefds[0], STDIN_FILENO);
 868           close (pipefds[0]);
 869
 870           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 871             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 872                    compress_program);
 873         }
 874       else
 875         node->pid = 0;
 876     }
 877
 878   *pfp = fdopen (tempfd, "w");
 879   if (! *pfp)
 880     die (_("couldn't create temporary file"), name);
 881
 882   if (ppid)
 883     *ppid = node->pid;
 884
 885   return name;
 886 }
 887
 888 /* Open a compressed temp file and start a decompression process through
 889    which to filter the input.  PID must be the valid processes ID of the
 890    process used to compress the file.  */
 891
 892 static FILE *
 893 open_temp (const char *name, pid_t pid)
 894 {
 895   int tempfd, pipefds[2];
 896   pid_t child_pid;
 897   FILE *fp;
 898
 899   wait_proc (pid);
 900
 901   tempfd = open (name, O_RDONLY);
 902   if (tempfd < 0)
 903     die (_("couldn't open temporary file"), name);
 904
 905   child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
 906   if (0 < child_pid)
 907     {
 908       close (tempfd);
 909       close (pipefds[1]);
 910     }
 911   else if (child_pid == 0)
 912     {
 913       close (pipefds[0]);
 914       dup2_or_die (tempfd, STDIN_FILENO);
 915       close (tempfd);
 916       dup2_or_die (pipefds[1], STDOUT_FILENO);
 917       close (pipefds[1]);
 918
 919       if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
 920         error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
 921                compress_program);
 922     }
 923   else
 924     error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
 925            compress_program);
 926
 927   fp = fdopen (pipefds[0], "r");
 928   if (! fp)
 929     die (_("couldn't create temporary file"), name);
 930
 931   return fp;
 932 }
 933
 934 static void
 935 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 936 {
 937   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 938     die (_("write failed"), output_file);
 939 }
 940
 941 /* Append DIR to the array of temporary directory names.  */
 942 static void
 943 add_temp_dir (char const *dir)
 944 {
 945   if (temp_dir_count == temp_dir_alloc)
 946     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
 947
 948   temp_dirs[temp_dir_count++] = dir;
 949 }
 950
 951 /* Remove NAME from the list of temporary files.  */
 952
 953 static void
 954 zaptemp (const char *name)
 955 {
 956   struct tempnode *volatile *pnode;
 957   struct tempnode *node;
 958   struct tempnode *next;
 959   int unlink_status;
 960   int unlink_errno = 0;
 961   struct cs_status cs;
 962
 963   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
 964     continue;
 965
 966   /* Unlink the temporary file in a critical section to avoid races.  */
 967   next = node->next;
 968   cs = cs_enter ();
 969   unlink_status = unlink (name);
 970   unlink_errno = errno;
 971   *pnode = next;
 972   cs_leave (cs);
 973
 974   if (unlink_status != 0)
 975     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
 976   if (! next)
 977     temptail = pnode;
 978   free (node);
 979 }
 980
 981 #if HAVE_NL_LANGINFO
 982
 983 static int
 984 struct_month_cmp (const void *m1, const void *m2)
 985 {
 986   struct month const *month1 = m1;
 987   struct month const *month2 = m2;
 988   return strcmp (month1->name, month2->name);
 989 }
 990
 991 #endif
 992
 993 /* Initialize the character class tables. */
 994
 995 static void
 996 inittables (void)
 997 {
 998   size_t i;
 999
1000   for (i = 0; i < UCHAR_LIM; ++i)
1001     {
1002       blanks[i] = !! isblank (i);
1003       nonprinting[i] = ! isprint (i);
1004       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1005       fold_toupper[i] = toupper (i);
1006     }
1007
1008 #if HAVE_NL_LANGINFO
1009   /* If we're not in the "C" locale, read different names for months.  */
1010   if (hard_LC_TIME)
1011     {
1012       for (i = 0; i < MONTHS_PER_YEAR; i++)
1013         {
1014           char const *s;
1015           size_t s_len;
1016           size_t j;
1017           char *name;
1018
1019           s = (char *) nl_langinfo (ABMON_1 + i);
1020           s_len = strlen (s);
1021           monthtab[i].name = name = xmalloc (s_len + 1);
1022           monthtab[i].val = i + 1;
1023
1024           for (j = 0; j < s_len; j++)
1025             name[j] = fold_toupper[to_uchar (s[j])];
1026           name[j] = '\0';
1027         }
1028       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1029              sizeof *monthtab, struct_month_cmp);
1030     }
1031 #endif
1032 }
1033
1034 /* Specify the amount of main memory to use when sorting.  */
1035 static void
1036 specify_sort_size (char const *s)
1037 {
1038   uintmax_t n;
1039   char *suffix;
1040   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1041
1042   /* The default unit is KiB.  */
1043   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1044     {
1045       if (n <= UINTMAX_MAX / 1024)
1046         n *= 1024;
1047       else
1048         e = LONGINT_OVERFLOW;
1049     }
1050
1051   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1052   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1053     switch (suffix[0])
1054       {
1055       case 'b':
1056         e = LONGINT_OK;
1057         break;
1058
1059       case '%':
1060         {
1061           double mem = physmem_total () * n / 100;
1062
1063           /* Use "<", not "<=", to avoid problems with rounding.  */
1064           if (mem < UINTMAX_MAX)
1065             {
1066               n = mem;
1067               e = LONGINT_OK;
1068             }
1069           else
1070             e = LONGINT_OVERFLOW;
1071         }
1072         break;
1073       }
1074
1075   if (e == LONGINT_OK)
1076     {
1077       /* If multiple sort sizes are specified, take the maximum, so
1078          that option order does not matter.  */
1079       if (n < sort_size)
1080         return;
1081
1082       sort_size = n;
1083       if (sort_size == n)
1084         {
1085           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1086           return;
1087         }
1088
1089       e = LONGINT_OVERFLOW;
1090     }
1091
1092   STRTOL_FATAL_ERROR (s, _("sort size"), e);
1093 }
1094
1095 /* Return the default sort size.  */
1096 static size_t
1097 default_sort_size (void)
1098 {
1099   /* Let MEM be available memory or 1/8 of total memory, whichever
1100      is greater.  */
1101   double avail = physmem_available ();
1102   double total = physmem_total ();
1103   double mem = MAX (avail, total / 8);
1104   struct rlimit rlimit;
1105
1106   /* Let SIZE be MEM, but no more than the maximum object size or
1107      system resource limits.  Avoid the MIN macro here, as it is not
1108      quite right when only one argument is floating point.  Don't
1109      bother to check for values like RLIM_INFINITY since in practice
1110      they are not much less than SIZE_MAX.  */
1111   size_t size = SIZE_MAX;
1112   if (mem < size)
1113     size = mem;
1114   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1115     size = rlimit.rlim_cur;
1116 #ifdef RLIMIT_AS
1117   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1118     size = rlimit.rlim_cur;
1119 #endif
1120
1121   /* Leave a large safety margin for the above limits, as failure can
1122      occur when they are exceeded.  */
1123   size /= 2;
1124
1125 #ifdef RLIMIT_RSS
1126   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1127      Exceeding RSS is not fatal, but can be quite slow.  */
1128   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1129     size = rlimit.rlim_cur / 16 * 15;
1130 #endif
1131
1132   /* Use no less than the minimum.  */
1133   return MAX (size, MIN_SORT_SIZE);
1134 }
1135
1136 /* Return the sort buffer size to use with the input files identified
1137    by FPS and FILES, which are alternate names of the same files.
1138    NFILES gives the number of input files; NFPS may be less.  Assume
1139    that each input line requires LINE_BYTES extra bytes' worth of line
1140    information.  Do not exceed the size bound specified by the user
1141    (or a default size bound, if the user does not specify one).  */
1142
1143 static size_t
1144 sort_buffer_size (FILE *const *fps, size_t nfps,
1145                   char *const *files, size_t nfiles,
1146                   size_t line_bytes)
1147 {
1148   /* A bound on the input size.  If zero, the bound hasn't been
1149      determined yet.  */
1150   static size_t size_bound;
1151
1152   /* In the worst case, each input byte is a newline.  */
1153   size_t worst_case_per_input_byte = line_bytes + 1;
1154
1155   /* Keep enough room for one extra input line and an extra byte.
1156      This extra room might be needed when preparing to read EOF.  */
1157   size_t size = worst_case_per_input_byte + 1;
1158
1159   size_t i;
1160
1161   for (i = 0; i < nfiles; i++)
1162     {
1163       struct stat st;
1164       off_t file_size;
1165       size_t worst_case;
1166
1167       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1168            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1169            : stat (files[i], &st))
1170           != 0)
1171         die (_("stat failed"), files[i]);
1172
1173       if (S_ISREG (st.st_mode))
1174         file_size = st.st_size;
1175       else
1176         {
1177           /* The file has unknown size.  If the user specified a sort
1178              buffer size, use that; otherwise, guess the size.  */
1179           if (sort_size)
1180             return sort_size;
1181           file_size = INPUT_FILE_SIZE_GUESS;
1182         }
1183
1184       if (! size_bound)
1185         {
1186           size_bound = sort_size;
1187           if (! size_bound)
1188             size_bound = default_sort_size ();
1189         }
1190
1191       /* Add the amount of memory needed to represent the worst case
1192          where the input consists entirely of newlines followed by a
1193          single non-newline.  Check for overflow.  */
1194       worst_case = file_size * worst_case_per_input_byte + 1;
1195       if (file_size != worst_case / worst_case_per_input_byte
1196           || size_bound - size <= worst_case)
1197         return size_bound;
1198       size += worst_case;
1199     }
1200
1201   return size;
1202 }
1203
1204 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1205    must be at least sizeof (struct line).  Allocate ALLOC bytes
1206    initially.  */
1207
1208 static void
1209 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1210 {
1211   /* Ensure that the line array is properly aligned.  If the desired
1212      size cannot be allocated, repeatedly halve it until allocation
1213      succeeds.  The smaller allocation may hurt overall performance,
1214      but that's better than failing.  */
1215   for (;;)
1216     {
1217       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1218       buf->buf = malloc (alloc);
1219       if (buf->buf)
1220         break;
1221       alloc /= 2;
1222       if (alloc <= line_bytes + 1)
1223         xalloc_die ();
1224     }
1225
1226   buf->line_bytes = line_bytes;
1227   buf->alloc = alloc;
1228   buf->used = buf->left = buf->nlines = 0;
1229   buf->eof = false;
1230 }
1231
1232 /* Return one past the limit of the line array.  */
1233
1234 static inline struct line *
1235 buffer_linelim (struct buffer const *buf)
1236 {
1237   return (struct line *) (buf->buf + buf->alloc);
1238 }
1239
1240 /* Return a pointer to the first character of the field specified
1241    by KEY in LINE. */
1242
1243 static char *
1244 begfield (const struct line *line, const struct keyfield *key)
1245 {
1246   char *ptr = line->text, *lim = ptr + line->length - 1;
1247   size_t sword = key->sword;
1248   size_t schar = key->schar;
1249   size_t remaining_bytes;
1250
1251   /* The leading field separator itself is included in a field when -t
1252      is absent.  */
1253
1254   if (tab != TAB_DEFAULT)
1255     while (ptr < lim && sword--)
1256       {
1257         while (ptr < lim && *ptr != tab)
1258           ++ptr;
1259         if (ptr < lim)
1260           ++ptr;
1261       }
1262   else
1263     while (ptr < lim && sword--)
1264       {
1265         while (ptr < lim && blanks[to_uchar (*ptr)])
1266           ++ptr;
1267         while (ptr < lim && !blanks[to_uchar (*ptr)])
1268           ++ptr;
1269       }
1270
1271   if (key->skipsblanks)
1272     while (ptr < lim && blanks[to_uchar (*ptr)])
1273       ++ptr;
1274
1275   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1276   remaining_bytes = lim - ptr;
1277   if (schar < remaining_bytes)
1278     ptr += schar;
1279   else
1280     ptr = lim;
1281
1282   return ptr;
1283 }
1284
1285 /* Return the limit of (a pointer to the first character after) the field
1286    in LINE specified by KEY. */
1287
1288 static char *
1289 limfield (const struct line *line, const struct keyfield *key)
1290 {
1291   char *ptr = line->text, *lim = ptr + line->length - 1;
1292   size_t eword = key->eword, echar = key->echar;
1293   size_t remaining_bytes;
1294
1295   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1296      whichever comes first.  If there are more than EWORD fields, leave
1297      PTR pointing at the beginning of the field having zero-based index,
1298      EWORD.  If a delimiter character was specified (via -t), then that
1299      `beginning' is the first character following the delimiting TAB.
1300      Otherwise, leave PTR pointing at the first `blank' character after
1301      the preceding field.  */
1302   if (tab != TAB_DEFAULT)
1303     while (ptr < lim && eword--)
1304       {
1305         while (ptr < lim && *ptr != tab)
1306           ++ptr;
1307         if (ptr < lim && (eword | echar))
1308           ++ptr;
1309       }
1310   else
1311     while (ptr < lim && eword--)
1312       {
1313         while (ptr < lim && blanks[to_uchar (*ptr)])
1314           ++ptr;
1315         while (ptr < lim && !blanks[to_uchar (*ptr)])
1316           ++ptr;
1317       }
1318
1319 #ifdef POSIX_UNSPECIFIED
1320   /* The following block of code makes GNU sort incompatible with
1321      standard Unix sort, so it's ifdef'd out for now.
1322      The POSIX spec isn't clear on how to interpret this.
1323      FIXME: request clarification.
1324
1325      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1326      Date: Thu, 30 May 96 12:20:41 -0400
1327      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1328
1329      [...]I believe I've found another bug in `sort'.
1330
1331      $ cat /tmp/sort.in
1332      a b c 2 d
1333      pq rs 1 t
1334      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1335      a b c 2 d
1336      pq rs 1 t
1337      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1338      pq rs 1 t
1339      a b c 2 d
1340
1341      Unix sort produced the answer I expected: sort on the single character
1342      in column 7.  GNU sort produced different results, because it disagrees
1343      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1344      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1345      "skip M-1 fields, then either N-1 characters or the rest of the current
1346      field, whichever comes first".  This extra clause applies only to
1347      key-ends, not key-starts.
1348      */
1349
1350   /* Make LIM point to the end of (one byte past) the current field.  */
1351   if (tab != TAB_DEFAULT)
1352     {
1353       char *newlim;
1354       newlim = memchr (ptr, tab, lim - ptr);
1355       if (newlim)
1356         lim = newlim;
1357     }
1358   else
1359     {
1360       char *newlim;
1361       newlim = ptr;
1362       while (newlim < lim && blanks[to_uchar (*newlim)])
1363         ++newlim;
1364       while (newlim < lim && !blanks[to_uchar (*newlim)])
1365         ++newlim;
1366       lim = newlim;
1367     }
1368 #endif
1369
1370   /* If we're ignoring leading blanks when computing the End
1371      of the field, don't start counting bytes until after skipping
1372      past any leading blanks. */
1373   if (key->skipeblanks)
1374     while (ptr < lim && blanks[to_uchar (*ptr)])
1375       ++ptr;
1376
1377   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1378   remaining_bytes = lim - ptr;
1379   if (echar < remaining_bytes)
1380     ptr += echar;
1381   else
1382     ptr = lim;
1383
1384   return ptr;
1385 }
1386
1387 /* Fill BUF reading from FP, moving buf->left bytes from the end
1388    of buf->buf to the beginning first.  If EOF is reached and the
1389    file wasn't terminated by a newline, supply one.  Set up BUF's line
1390    table too.  FILE is the name of the file corresponding to FP.
1391    Return true if some input was read.  */
1392
1393 static bool
1394 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1395 {
1396   struct keyfield const *key = keylist;
1397   char eol = eolchar;
1398   size_t line_bytes = buf->line_bytes;
1399   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1400
1401   if (buf->eof)
1402     return false;
1403
1404   if (buf->used != buf->left)
1405     {
1406       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1407       buf->used = buf->left;
1408       buf->nlines = 0;
1409     }
1410
1411   for (;;)
1412     {
1413       char *ptr = buf->buf + buf->used;
1414       struct line *linelim = buffer_linelim (buf);
1415       struct line *line = linelim - buf->nlines;
1416       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1417       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1418
1419       while (line_bytes + 1 < avail)
1420         {
1421           /* Read as many bytes as possible, but do not read so many
1422              bytes that there might not be enough room for the
1423              corresponding line array.  The worst case is when the
1424              rest of the input file consists entirely of newlines,
1425              except that the last byte is not a newline.  */
1426           size_t readsize = (avail - 1) / (line_bytes + 1);
1427           size_t bytes_read = fread (ptr, 1, readsize, fp);
1428           char *ptrlim = ptr + bytes_read;
1429           char *p;
1430           avail -= bytes_read;
1431
1432           if (bytes_read != readsize)
1433             {
1434               if (ferror (fp))
1435                 die (_("read failed"), file);
1436               if (feof (fp))
1437                 {
1438                   buf->eof = true;
1439                   if (buf->buf == ptrlim)
1440                     return false;
1441                   if (ptrlim[-1] != eol)
1442                     *ptrlim++ = eol;
1443                 }
1444             }
1445
1446           /* Find and record each line in the just-read input.  */
1447           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1448             {
1449               ptr = p + 1;
1450               line--;
1451               line->text = line_start;
1452               line->length = ptr - line_start;
1453               mergesize = MAX (mergesize, line->length);
1454               avail -= line_bytes;
1455
1456               if (key)
1457                 {
1458                   /* Precompute the position of the first key for
1459                      efficiency.  */
1460                   line->keylim = (key->eword == SIZE_MAX
1461                                   ? p
1462                                   : limfield (line, key));
1463
1464                   if (key->sword != SIZE_MAX)
1465                     line->keybeg = begfield (line, key);
1466                   else
1467                     {
1468                       if (key->skipsblanks)
1469                         while (blanks[to_uchar (*line_start)])
1470                           line_start++;
1471                       line->keybeg = line_start;
1472                     }
1473                 }
1474
1475               line_start = ptr;
1476             }
1477
1478           ptr = ptrlim;
1479           if (buf->eof)
1480             break;
1481         }
1482
1483       buf->used = ptr - buf->buf;
1484       buf->nlines = buffer_linelim (buf) - line;
1485       if (buf->nlines != 0)
1486         {
1487           buf->left = ptr - line_start;
1488           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1489           return true;
1490         }
1491
1492       /* The current input line is too long to fit in the buffer.
1493          Double the buffer size and try again.  */
1494       buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1495     }
1496 }
1497
1498 /* Compare strings A and B as numbers without explicitly converting them to
1499    machine numbers.  Comparatively slow for short strings, but asymptotically
1500    hideously fast. */
1501
1502 static int
1503 numcompare (const char *a, const char *b)
1504 {
1505   while (blanks[to_uchar (*a)])
1506     a++;
1507   while (blanks[to_uchar (*b)])
1508     b++;
1509
1510   return strnumcmp (a, b, decimal_point, thousands_sep);
1511 }
1512
1513 static int
1514 general_numcompare (const char *sa, const char *sb)
1515 {
1516   /* FIXME: add option to warn about failed conversions.  */
1517   /* FIXME: maybe add option to try expensive FP conversion
1518      only if A and B can't be compared more cheaply/accurately.  */
1519
1520   char *ea;
1521   char *eb;
1522   double a = strtod (sa, &ea);
1523   double b = strtod (sb, &eb);
1524
1525   /* Put conversion errors at the start of the collating sequence.  */
1526   if (sa == ea)
1527     return sb == eb ? 0 : -1;
1528   if (sb == eb)
1529     return 1;
1530
1531   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1532      conversion errors but before numbers; sort them by internal
1533      bit-pattern, for lack of a more portable alternative.  */
1534   return (a < b ? -1
1535           : a > b ? 1
1536           : a == b ? 0
1537           : b == b ? -1
1538           : a == a ? 1
1539           : memcmp ((char *) &a, (char *) &b, sizeof a));
1540 }
1541
1542 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1543    Return 0 if the name in S is not recognized.  */
1544
1545 static int
1546 getmonth (char const *month, size_t len)
1547 {
1548   size_t lo = 0;
1549   size_t hi = MONTHS_PER_YEAR;
1550   char const *monthlim = month + len;
1551
1552   for (;;)
1553     {
1554       if (month == monthlim)
1555         return 0;
1556       if (!blanks[to_uchar (*month)])
1557         break;
1558       ++month;
1559     }
1560
1561   do
1562     {
1563       size_t ix = (lo + hi) / 2;
1564       char const *m = month;
1565       char const *n = monthtab[ix].name;
1566
1567       for (;; m++, n++)
1568         {
1569           if (!*n)
1570             return monthtab[ix].val;
1571           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1572             {
1573               hi = ix;
1574               break;
1575             }
1576           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1577             {
1578               lo = ix + 1;
1579               break;
1580             }
1581         }
1582     }
1583   while (lo < hi);
1584
1585   return 0;
1586 }
1587
1588 /* A source of random data.  */
1589 static struct randread_source *randread_source;
1590
1591 /* Return the Ith randomly-generated state.  The caller must invoke
1592    random_state (H) for all H less than I before invoking random_state
1593    (I).  */
1594
1595 static struct md5_ctx
1596 random_state (size_t i)
1597 {
1598   /* An array of states resulting from the random data, and counts of
1599      its used and allocated members.  */
1600   static struct md5_ctx *state;
1601   static size_t used;
1602   static size_t allocated;
1603
1604   struct md5_ctx *s = &state[i];
1605
1606   if (used <= i)
1607     {
1608       unsigned char buf[MD5_DIGEST_SIZE];
1609
1610       used++;
1611
1612       if (allocated <= i)
1613         {
1614           state = X2NREALLOC (state, &allocated);
1615           s = &state[i];
1616         }
1617
1618       randread (randread_source, buf, sizeof buf);
1619       md5_init_ctx (s);
1620       md5_process_bytes (buf, sizeof buf, s);
1621     }
1622
1623   return *s;
1624 }
1625
1626 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1627    with length LENGTHB.  Return negative if less, zero if equal,
1628    positive if greater.  */
1629
1630 static int
1631 cmp_hashes (char const *texta, size_t lena,
1632             char const *textb, size_t lenb)
1633 {
1634   /* Try random hashes until a pair of hashes disagree.  But if the
1635      first pair of random hashes agree, check whether the keys are
1636      identical and if so report no difference.  */
1637   int diff;
1638   size_t i;
1639   for (i = 0; ; i++)
1640     {
1641       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1642       struct md5_ctx s[2];
1643       s[0] = s[1] = random_state (i);
1644       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1645       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1646       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1647       if (diff != 0)
1648         break;
1649       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1650         break;
1651     }
1652
1653   return diff;
1654 }
1655
1656 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1657    using one or more random hash functions.  */
1658
1659 static int
1660 compare_random (char *restrict texta, size_t lena,
1661                 char *restrict textb, size_t lenb)
1662 {
1663   int diff;
1664
1665   if (! hard_LC_COLLATE)
1666     diff = cmp_hashes (texta, lena, textb, lenb);
1667   else
1668     {
1669       /* Transform the text into the basis of comparison, so that byte
1670          strings that would otherwise considered to be equal are
1671          considered equal here even if their bytes differ.  */
1672
1673       char *buf = NULL;
1674       char stackbuf[4000];
1675       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1676       bool a_fits = tlena <= sizeof stackbuf;
1677       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1678                                (a_fits ? sizeof stackbuf - tlena : 0),
1679                                textb, lenb);
1680
1681       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1682         buf = stackbuf;
1683       else
1684         {
1685           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1686              faster by avoiding the need for an extra buffer copy.  */
1687           buf = xmalloc (tlena + tlenb + 1);
1688           xmemxfrm (buf, tlena + 1, texta, lena);
1689           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1690         }
1691
1692       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1693
1694       if (buf != stackbuf)
1695         free (buf);
1696     }
1697
1698   return diff;
1699 }
1700
1701 /* Compare two lines A and B trying every key in sequence until there
1702    are no more keys or a difference is found. */
1703
1704 static int
1705 keycompare (const struct line *a, const struct line *b)
1706 {
1707   struct keyfield const *key = keylist;
1708
1709   /* For the first iteration only, the key positions have been
1710      precomputed for us. */
1711   char *texta = a->keybeg;
1712   char *textb = b->keybeg;
1713   char *lima = a->keylim;
1714   char *limb = b->keylim;
1715
1716   int diff;
1717
1718   for (;;)
1719     {
1720       char const *translate = key->translate;
1721       bool const *ignore = key->ignore;
1722
1723       /* Find the lengths. */
1724       size_t lena = lima <= texta ? 0 : lima - texta;
1725       size_t lenb = limb <= textb ? 0 : limb - textb;
1726
1727       /* Actually compare the fields. */
1728
1729       if (key->random)
1730         diff = compare_random (texta, lena, textb, lenb);
1731       else if (key->numeric | key->general_numeric)
1732         {
1733           char savea = *lima, saveb = *limb;
1734
1735           *lima = *limb = '\0';
1736           diff = ((key->numeric ? numcompare : general_numcompare)
1737                   (texta, textb));
1738           *lima = savea, *limb = saveb;
1739         }
1740       else if (key->month)
1741         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1742       /* Sorting like this may become slow, so in a simple locale the user
1743          can select a faster sort that is similar to ascii sort.  */
1744       else if (hard_LC_COLLATE)
1745         {
1746           if (ignore || translate)
1747             {
1748               char buf[4000];
1749               size_t size = lena + 1 + lenb + 1;
1750               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1751               char *copy_b = copy_a + lena + 1;
1752               size_t new_len_a, new_len_b, i;
1753
1754               /* Ignore and/or translate chars before comparing.  */
1755               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1756                 {
1757                   if (i < lena)
1758                     {
1759                       copy_a[new_len_a] = (translate
1760                                            ? translate[to_uchar (texta[i])]
1761                                            : texta[i]);
1762                       if (!ignore || !ignore[to_uchar (texta[i])])
1763                         ++new_len_a;
1764                     }
1765                   if (i < lenb)
1766                     {
1767                       copy_b[new_len_b] = (translate
1768                                            ? translate[to_uchar (textb[i])]
1769                                            : textb [i]);
1770                       if (!ignore || !ignore[to_uchar (textb[i])])
1771                         ++new_len_b;
1772                     }
1773                 }
1774
1775               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1776
1777               if (sizeof buf < size)
1778                 free (copy_a);
1779             }
1780           else if (lena == 0)
1781             diff = - NONZERO (lenb);
1782           else if (lenb == 0)
1783             goto greater;
1784           else
1785             diff = xmemcoll (texta, lena, textb, lenb);
1786         }
1787       else if (ignore)
1788         {
1789 #define CMP_WITH_IGNORE(A, B)                                           \
1790   do                                                                    \
1791     {                                                                   \
1792           for (;;)                                                      \
1793             {                                                           \
1794               while (texta < lima && ignore[to_uchar (*texta)])         \
1795                 ++texta;                                                \
1796               while (textb < limb && ignore[to_uchar (*textb)])         \
1797                 ++textb;                                                \
1798               if (! (texta < lima && textb < limb))                     \
1799                 break;                                                  \
1800               diff = to_uchar (A) - to_uchar (B);                       \
1801               if (diff)                                                 \
1802                 goto not_equal;                                         \
1803               ++texta;                                                  \
1804               ++textb;                                                  \
1805             }                                                           \
1806                                                                         \
1807           diff = (texta < lima) - (textb < limb);                       \
1808     }                                                                   \
1809   while (0)
1810
1811           if (translate)
1812             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1813                              translate[to_uchar (*textb)]);
1814           else
1815             CMP_WITH_IGNORE (*texta, *textb);
1816         }
1817       else if (lena == 0)
1818         diff = - NONZERO (lenb);
1819       else if (lenb == 0)
1820         goto greater;
1821       else
1822         {
1823           if (translate)
1824             {
1825               while (texta < lima && textb < limb)
1826                 {
1827                   diff = (to_uchar (translate[to_uchar (*texta++)])
1828                           - to_uchar (translate[to_uchar (*textb++)]));
1829                   if (diff)
1830                     goto not_equal;
1831                 }
1832             }
1833           else
1834             {
1835               diff = memcmp (texta, textb, MIN (lena, lenb));
1836               if (diff)
1837                 goto not_equal;
1838             }
1839           diff = lena < lenb ? -1 : lena != lenb;
1840         }
1841
1842       if (diff)
1843         goto not_equal;
1844
1845       key = key->next;
1846       if (! key)
1847         break;
1848
1849       /* Find the beginning and limit of the next field.  */
1850       if (key->eword != SIZE_MAX)
1851         lima = limfield (a, key), limb = limfield (b, key);
1852       else
1853         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1854
1855       if (key->sword != SIZE_MAX)
1856         texta = begfield (a, key), textb = begfield (b, key);
1857       else
1858         {
1859           texta = a->text, textb = b->text;
1860           if (key->skipsblanks)
1861             {
1862               while (texta < lima && blanks[to_uchar (*texta)])
1863                 ++texta;
1864               while (textb < limb && blanks[to_uchar (*textb)])
1865                 ++textb;
1866             }
1867         }
1868     }
1869
1870   return 0;
1871
1872  greater:
1873   diff = 1;
1874  not_equal:
1875   return key->reverse ? -diff : diff;
1876 }
1877
1878 /* Compare two lines A and B, returning negative, zero, or positive
1879    depending on whether A compares less than, equal to, or greater than B. */
1880
1881 static int
1882 compare (const struct line *a, const struct line *b)
1883 {
1884   int diff;
1885   size_t alen, blen;
1886
1887   /* First try to compare on the specified keys (if any).
1888      The only two cases with no key at all are unadorned sort,
1889      and unadorned sort -r. */
1890   if (keylist)
1891     {
1892       diff = keycompare (a, b);
1893       if (diff | unique | stable)
1894         return diff;
1895     }
1896
1897   /* If the keys all compare equal (or no keys were specified)
1898      fall through to the default comparison.  */
1899   alen = a->length - 1, blen = b->length - 1;
1900
1901   if (alen == 0)
1902     diff = - NONZERO (blen);
1903   else if (blen == 0)
1904     diff = 1;
1905   else if (hard_LC_COLLATE)
1906     diff = xmemcoll (a->text, alen, b->text, blen);
1907   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1908     diff = alen < blen ? -1 : alen != blen;
1909
1910   return reverse ? -diff : diff;
1911 }
1912
1913 /* Check that the lines read from FILE_NAME come in order.  Return
1914    true if they are in order.  If CHECKONLY == 'c', also print a
1915    diagnostic (FILE_NAME, line number, contents of line) to stderr if
1916    they are not in order.  */
1917
1918 static bool
1919 check (char const *file_name, char checkonly)
1920 {
1921   FILE *fp = xfopen (file_name, "r");
1922   struct buffer buf;            /* Input buffer. */
1923   struct line temp;             /* Copy of previous line. */
1924   size_t alloc = 0;
1925   uintmax_t line_number = 0;
1926   struct keyfield const *key = keylist;
1927   bool nonunique = ! unique;
1928   bool ordered = true;
1929
1930   initbuf (&buf, sizeof (struct line),
1931            MAX (merge_buffer_size, sort_size));
1932   temp.text = NULL;
1933
1934   while (fillbuf (&buf, fp, file_name))
1935     {
1936       struct line const *line = buffer_linelim (&buf);
1937       struct line const *linebase = line - buf.nlines;
1938
1939       /* Make sure the line saved from the old buffer contents is
1940          less than or equal to the first line of the new buffer. */
1941       if (alloc && nonunique <= compare (&temp, line - 1))
1942         {
1943         found_disorder:
1944           {
1945             if (checkonly == 'c')
1946               {
1947                 struct line const *disorder_line = line - 1;
1948                 uintmax_t disorder_line_number =
1949                   buffer_linelim (&buf) - disorder_line + line_number;
1950                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1951                 fprintf (stderr, _("%s: %s:%s: disorder: "),
1952                          program_name, file_name,
1953                          umaxtostr (disorder_line_number, hr_buf));
1954                 write_bytes (disorder_line->text, disorder_line->length,
1955                              stderr, _("standard error"));
1956               }
1957
1958             ordered = false;
1959             break;
1960           }
1961         }
1962
1963       /* Compare each line in the buffer with its successor.  */
1964       while (linebase < --line)
1965         if (nonunique <= compare (line, line - 1))
1966           goto found_disorder;
1967
1968       line_number += buf.nlines;
1969
1970       /* Save the last line of the buffer.  */
1971       if (alloc < line->length)
1972         {
1973           do
1974             {
1975               alloc *= 2;
1976               if (! alloc)
1977                 {
1978                   alloc = line->length;
1979                   break;
1980                 }
1981             }
1982           while (alloc < line->length);
1983
1984           temp.text = xrealloc (temp.text, alloc);
1985         }
1986       memcpy (temp.text, line->text, line->length);
1987       temp.length = line->length;
1988       if (key)
1989         {
1990           temp.keybeg = temp.text + (line->keybeg - line->text);
1991           temp.keylim = temp.text + (line->keylim - line->text);
1992         }
1993     }
1994
1995   xfclose (fp, file_name);
1996   free (buf.buf);
1997   free (temp.text);
1998   return ordered;
1999 }
2000
2001 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2002    files (all of which are at the start of the FILES array), and
2003    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2004    Close input and output files before returning.
2005    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2006    the output file is standard output.  If OFP is NULL, the output
2007    file has not been opened yet (or written to, if standard output).  */
2008
2009 static void
2010 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2011           FILE *ofp, char const *output_file)
2012 {
2013   FILE *fps[NMERGE];            /* Input streams for each file.  */
2014   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2015   struct line saved;            /* Saved line storage for unique check. */
2016   struct line const *savedline = NULL;
2017                                 /* &saved if there is a saved line. */
2018   size_t savealloc = 0;         /* Size allocated for the saved line. */
2019   struct line const *cur[NMERGE]; /* Current line in each line table. */
2020   struct line const *base[NMERGE]; /* Base of each line table.  */
2021   size_t ord[NMERGE];           /* Table representing a permutation of fps,
2022                                    such that cur[ord[0]] is the smallest line
2023                                    and will be next output. */
2024   size_t i;
2025   size_t j;
2026   size_t t;
2027   struct keyfield const *key = keylist;
2028   saved.text = NULL;
2029
2030   /* Read initial lines from each input file. */
2031   for (i = 0; i < nfiles; )
2032     {
2033       fps[i] = (files[i].pid
2034                 ? open_temp (files[i].name, files[i].pid)
2035                 : xfopen (files[i].name, "r"));
2036       initbuf (&buffer[i], sizeof (struct line),
2037                MAX (merge_buffer_size, sort_size / nfiles));
2038       if (fillbuf (&buffer[i], fps[i], files[i].name))
2039         {
2040           struct line const *linelim = buffer_linelim (&buffer[i]);
2041           cur[i] = linelim - 1;
2042           base[i] = linelim - buffer[i].nlines;
2043           i++;
2044         }
2045       else
2046         {
2047           /* fps[i] is empty; eliminate it from future consideration.  */
2048           xfclose (fps[i], files[i].name);
2049           if (i < ntemps)
2050             {
2051               ntemps--;
2052               zaptemp (files[i].name);
2053             }
2054           free (buffer[i].buf);
2055           --nfiles;
2056           for (j = i; j < nfiles; ++j)
2057             files[j] = files[j + 1];
2058         }
2059     }
2060
2061   if (! ofp)
2062     ofp = xfopen (output_file, "w");
2063
2064   /* Set up the ord table according to comparisons among input lines.
2065      Since this only reorders two items if one is strictly greater than
2066      the other, it is stable. */
2067   for (i = 0; i < nfiles; ++i)
2068     ord[i] = i;
2069   for (i = 1; i < nfiles; ++i)
2070     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2071       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2072
2073   /* Repeatedly output the smallest line until no input remains. */
2074   while (nfiles)
2075     {
2076       struct line const *smallest = cur[ord[0]];
2077
2078       /* If uniquified output is turned on, output only the first of
2079          an identical series of lines. */
2080       if (unique)
2081         {
2082           if (savedline && compare (savedline, smallest))
2083             {
2084               savedline = NULL;
2085               write_bytes (saved.text, saved.length, ofp, output_file);
2086             }
2087           if (!savedline)
2088             {
2089               savedline = &saved;
2090               if (savealloc < smallest->length)
2091                 {
2092                   do
2093                     if (! savealloc)
2094                       {
2095                         savealloc = smallest->length;
2096                         break;
2097                       }
2098                   while ((savealloc *= 2) < smallest->length);
2099
2100                   saved.text = xrealloc (saved.text, savealloc);
2101                 }
2102               saved.length = smallest->length;
2103               memcpy (saved.text, smallest->text, saved.length);
2104               if (key)
2105                 {
2106                   saved.keybeg =
2107                     saved.text + (smallest->keybeg - smallest->text);
2108                   saved.keylim =
2109                     saved.text + (smallest->keylim - smallest->text);
2110                 }
2111             }
2112         }
2113       else
2114         write_bytes (smallest->text, smallest->length, ofp, output_file);
2115
2116       /* Check if we need to read more lines into core. */
2117       if (base[ord[0]] < smallest)
2118         cur[ord[0]] = smallest - 1;
2119       else
2120         {
2121           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2122             {
2123               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2124               cur[ord[0]] = linelim - 1;
2125               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2126             }
2127           else
2128             {
2129               /* We reached EOF on fps[ord[0]].  */
2130               for (i = 1; i < nfiles; ++i)
2131                 if (ord[i] > ord[0])
2132                   --ord[i];
2133               --nfiles;
2134               xfclose (fps[ord[0]], files[ord[0]].name);
2135               if (ord[0] < ntemps)
2136                 {
2137                   ntemps--;
2138                   zaptemp (files[ord[0]].name);
2139                 }
2140               free (buffer[ord[0]].buf);
2141               for (i = ord[0]; i < nfiles; ++i)
2142                 {
2143                   fps[i] = fps[i + 1];
2144                   files[i] = files[i + 1];
2145                   buffer[i] = buffer[i + 1];
2146                   cur[i] = cur[i + 1];
2147                   base[i] = base[i + 1];
2148                 }
2149               for (i = 0; i < nfiles; ++i)
2150                 ord[i] = ord[i + 1];
2151               continue;
2152             }
2153         }
2154
2155       /* The new line just read in may be larger than other lines
2156          already in main memory; push it back in the queue until we
2157          encounter a line larger than it.  Optimize for the common
2158          case where the new line is smallest.  */
2159       {
2160         size_t lo = 1;
2161         size_t hi = nfiles;
2162         size_t probe = lo;
2163         size_t ord0 = ord[0];
2164         size_t count_of_smaller_lines;
2165
2166         while (lo < hi)
2167           {
2168             int cmp = compare (cur[ord0], cur[ord[probe]]);
2169             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2170               hi = probe;
2171             else
2172               lo = probe + 1;
2173             probe = (lo + hi) / 2;
2174           }
2175
2176         count_of_smaller_lines = lo - 1;
2177         for (j = 0; j < count_of_smaller_lines; j++)
2178           ord[j] = ord[j + 1];
2179         ord[count_of_smaller_lines] = ord0;
2180       }
2181
2182       /* Free up some resources every once in a while.  */
2183       if (MAX_PROCS_BEFORE_REAP < nprocs)
2184         reap_some ();
2185     }
2186
2187   if (unique && savedline)
2188     {
2189       write_bytes (saved.text, saved.length, ofp, output_file);
2190       free (saved.text);
2191     }
2192
2193   xfclose (ofp, output_file);
2194 }
2195
2196 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2197    and HI (with NHI members).  T, LO, and HI point just past their
2198    respective arrays, and the arrays are in reverse order.  NLO and
2199    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2200
2201 static inline void
2202 mergelines (struct line *t,
2203             struct line const *lo, size_t nlo,
2204             struct line const *hi, size_t nhi)
2205 {
2206   for (;;)
2207     if (compare (lo - 1, hi - 1) <= 0)
2208       {
2209         *--t = *--lo;
2210         if (! --nlo)
2211           {
2212             /* HI - NHI equalled T - (NLO + NHI) when this function
2213                began.  Therefore HI must equal T now, and there is no
2214                need to copy from HI to T.  */
2215             return;
2216           }
2217       }
2218     else
2219       {
2220         *--t = *--hi;
2221         if (! --nhi)
2222           {
2223             do
2224               *--t = *--lo;
2225             while (--nlo);
2226
2227             return;
2228           }
2229       }
2230 }
2231
2232 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2233    NLINES must be at least 2.
2234    The input and output arrays are in reverse order, and LINES and
2235    TEMP point just past the end of their respective arrays.
2236
2237    Use a recursive divide-and-conquer algorithm, in the style
2238    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2239    the optimization suggested by exercise 5.2.4-10; this requires room
2240    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2241    writes that this memory optimization was originally published by
2242    D. A. Bell, Comp J. 1 (1958), 75.  */
2243
2244 static void
2245 sortlines (struct line *lines, size_t nlines, struct line *temp)
2246 {
2247   if (nlines == 2)
2248     {
2249       if (0 < compare (&lines[-1], &lines[-2]))
2250         {
2251           struct line tmp = lines[-1];
2252           lines[-1] = lines[-2];
2253           lines[-2] = tmp;
2254         }
2255     }
2256   else
2257     {
2258       size_t nlo = nlines / 2;
2259       size_t nhi = nlines - nlo;
2260       struct line *lo = lines;
2261       struct line *hi = lines - nlo;
2262       struct line *sorted_lo = temp;
2263
2264       sortlines (hi, nhi, temp);
2265       if (1 < nlo)
2266         sortlines_temp (lo, nlo, sorted_lo);
2267       else
2268         sorted_lo[-1] = lo[-1];
2269
2270       mergelines (lines, sorted_lo, nlo, hi, nhi);
2271     }
2272 }
2273
2274 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2275    rather than sorting in place.  */
2276
2277 static void
2278 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2279 {
2280   if (nlines == 2)
2281     {
2282       /* Declare `swap' as int, not bool, to work around a bug
2283          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2284          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2285       int swap = (0 < compare (&lines[-1], &lines[-2]));
2286       temp[-1] = lines[-1 - swap];
2287       temp[-2] = lines[-2 + swap];
2288     }
2289   else
2290     {
2291       size_t nlo = nlines / 2;
2292       size_t nhi = nlines - nlo;
2293       struct line *lo = lines;
2294       struct line *hi = lines - nlo;
2295       struct line *sorted_hi = temp - nlo;
2296
2297       sortlines_temp (hi, nhi, sorted_hi);
2298       if (1 < nlo)
2299         sortlines (lo, nlo, temp);
2300
2301       mergelines (temp, lo, nlo, sorted_hi, nhi);
2302     }
2303 }
2304
2305 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2306    the same as OUTFILE.  If found, merge the found instances (and perhaps
2307    some other files) into a temporary file so that it can in turn be
2308    merged into OUTFILE without destroying OUTFILE before it is completely
2309    read.  Return the new value of NFILES, which differs from the old if
2310    some merging occurred.
2311
2312    This test ensures that an otherwise-erroneous use like
2313    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2314    It's not clear that POSIX requires this nicety.
2315    Detect common error cases, but don't try to catch obscure cases like
2316    "cat ... FILE ... | sort -m -o FILE"
2317    where traditional "sort" doesn't copy the input and where
2318    people should know that they're getting into trouble anyway.
2319    Catching these obscure cases would slow down performance in
2320    common cases.  */
2321
2322 static size_t
2323 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2324                       size_t nfiles, char const *outfile)
2325 {
2326   size_t i;
2327   bool got_outstat = false;
2328   struct stat outstat;
2329
2330   for (i = ntemps; i < nfiles; i++)
2331     {
2332       bool is_stdin = STREQ (files[i].name, "-");
2333       bool same;
2334       struct stat instat;
2335
2336       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2337         same = true;
2338       else
2339         {
2340           if (! got_outstat)
2341             {
2342               if ((outfile
2343                    ? stat (outfile, &outstat)
2344                    : fstat (STDOUT_FILENO, &outstat))
2345                   != 0)
2346                 break;
2347               got_outstat = true;
2348             }
2349
2350           same = (((is_stdin
2351                     ? fstat (STDIN_FILENO, &instat)
2352                     : stat (files[i].name, &instat))
2353                    == 0)
2354                   && SAME_INODE (instat, outstat));
2355         }
2356
2357       if (same)
2358         {
2359           FILE *tftp;
2360           pid_t pid;
2361           char *temp = create_temp (&tftp, &pid);
2362           mergefps (&files[i],0, nfiles - i, tftp, temp);
2363           files[i].name = temp;
2364           files[i].pid = pid;
2365           return i + 1;
2366         }
2367     }
2368
2369   return nfiles;
2370 }
2371
2372 /* Merge the input FILES.  NTEMPS is the number of files at the
2373    start of FILES that are temporary; it is zero at the top level.
2374    NFILES is the total number of files.  Put the output in
2375    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2376
2377 static void
2378 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2379        char const *output_file)
2380 {
2381   while (NMERGE < nfiles)
2382     {
2383       /* Number of input files processed so far.  */
2384       size_t in;
2385
2386       /* Number of output files generated so far.  */
2387       size_t out;
2388
2389       /* nfiles % NMERGE; this counts input files that are left over
2390          after all full-sized merges have been done.  */
2391       size_t remainder;
2392
2393       /* Number of easily-available slots at the next loop iteration.  */
2394       size_t cheap_slots;
2395
2396       /* Do as many NMERGE-size merges as possible.  */
2397       for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2398         {
2399           FILE *tfp;
2400           pid_t pid;
2401           char *temp = create_temp (&tfp, &pid);
2402           size_t nt = MIN (ntemps, NMERGE);
2403           ntemps -= nt;
2404           mergefps (&files[in], nt, NMERGE, tfp, temp);
2405           files[out].name = temp;
2406           files[out].pid = pid;
2407         }
2408
2409       remainder = nfiles - in;
2410       cheap_slots = NMERGE - out % NMERGE;
2411
2412       if (cheap_slots < remainder)
2413         {
2414           /* So many files remain that they can't all be put into the last
2415              NMERGE-sized output window.  Do one more merge.  Merge as few
2416              files as possible, to avoid needless I/O.  */
2417           size_t nshortmerge = remainder - cheap_slots + 1;
2418           FILE *tfp;
2419           pid_t pid;
2420           char *temp = create_temp (&tfp, &pid);
2421           size_t nt = MIN (ntemps, nshortmerge);
2422           ntemps -= nt;
2423           mergefps (&files[in], nt, nshortmerge, tfp, temp);
2424           files[out].name = temp;
2425           files[out++].pid = pid;
2426           in += nshortmerge;
2427         }
2428
2429       /* Put the remaining input files into the last NMERGE-sized output
2430          window, so they will be merged in the next pass.  */
2431       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2432       ntemps += out;
2433       nfiles -= in - out;
2434     }
2435
2436   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2437   mergefps (files, ntemps, nfiles, NULL, output_file);
2438 }
2439
2440 /* Sort NFILES FILES onto OUTPUT_FILE. */
2441
2442 static void
2443 sort (char * const *files, size_t nfiles, char const *output_file)
2444 {
2445   struct buffer buf;
2446   size_t ntemps = 0;
2447   bool output_file_created = false;
2448
2449   buf.alloc = 0;
2450
2451   while (nfiles)
2452     {
2453       char const *temp_output;
2454       char const *file = *files;
2455       FILE *fp = xfopen (file, "r");
2456       FILE *tfp;
2457       size_t bytes_per_line = (2 * sizeof (struct line)
2458                                - sizeof (struct line) / 2);
2459
2460       if (! buf.alloc)
2461         initbuf (&buf, bytes_per_line,
2462                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2463       buf.eof = false;
2464       files++;
2465       nfiles--;
2466
2467       while (fillbuf (&buf, fp, file))
2468         {
2469           struct line *line;
2470           struct line *linebase;
2471
2472           if (buf.eof && nfiles
2473               && (bytes_per_line + 1
2474                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2475             {
2476               /* End of file, but there is more input and buffer room.
2477                  Concatenate the next input file; this is faster in
2478                  the usual case.  */
2479               buf.left = buf.used;
2480               break;
2481             }
2482
2483           line = buffer_linelim (&buf);
2484           linebase = line - buf.nlines;
2485           if (1 < buf.nlines)
2486             sortlines (line, buf.nlines, linebase);
2487           if (buf.eof && !nfiles && !ntemps && !buf.left)
2488             {
2489               xfclose (fp, file);
2490               tfp = xfopen (output_file, "w");
2491               temp_output = output_file;
2492               output_file_created = true;
2493             }
2494           else
2495             {
2496               ++ntemps;
2497               temp_output = create_temp (&tfp, NULL);
2498             }
2499
2500           do
2501             {
2502               line--;
2503               write_bytes (line->text, line->length, tfp, temp_output);
2504               if (unique)
2505                 while (linebase < line && compare (line, line - 1) == 0)
2506                   line--;
2507             }
2508           while (linebase < line);
2509
2510           xfclose (tfp, temp_output);
2511
2512           /* Free up some resources every once in a while.  */
2513           if (MAX_PROCS_BEFORE_REAP < nprocs)
2514             reap_some ();
2515
2516           if (output_file_created)
2517             goto finish;
2518         }
2519       xfclose (fp, file);
2520     }
2521
2522  finish:
2523   free (buf.buf);
2524
2525   if (! output_file_created)
2526     {
2527       size_t i;
2528       struct tempnode *node = temphead;
2529       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2530       for (i = 0; node; i++)
2531         {
2532           tempfiles[i].name = node->name;
2533           tempfiles[i].pid = node->pid;
2534           node = node->next;
2535         }
2536       merge (tempfiles, ntemps, ntemps, output_file);
2537       free (tempfiles);
2538     }
2539 }
2540
2541 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2542
2543 static void
2544 insertkey (struct keyfield *key_arg)
2545 {
2546   struct keyfield **p;
2547   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2548
2549   for (p = &keylist; *p; p = &(*p)->next)
2550     continue;
2551   *p = key;
2552   key->next = NULL;
2553 }
2554
2555 /* Report a bad field specification SPEC, with extra info MSGID.  */
2556
2557 static void badfieldspec (char const *, char const *)
2558      ATTRIBUTE_NORETURN;
2559 static void
2560 badfieldspec (char const *spec, char const *msgid)
2561 {
2562   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2563          _(msgid), quote (spec));
2564   abort ();
2565 }
2566
2567 /* Report incompatible options.  */
2568
2569 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2570 static void
2571 incompatible_options (char const *opts)
2572 {
2573   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2574   abort ();
2575 }
2576
2577 /* Check compatibility of ordering options.  */
2578
2579 static void
2580 check_ordering_compatibility (void)
2581 {
2582   struct keyfield const *key;
2583
2584   for (key = keylist; key; key = key->next)
2585     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2586               + !!key->ignore))
2587         || (key->random && key->translate))
2588       {
2589         char opts[7];
2590         char *p = opts;
2591         if (key->ignore == nondictionary)
2592           *p++ = 'd';
2593         if (key->translate)
2594           *p++ = 'f';
2595         if (key->general_numeric)
2596           *p++ = 'g';
2597         if (key->ignore == nonprinting)
2598           *p++ = 'i';
2599         if (key->month)
2600           *p++ = 'M';
2601         if (key->numeric)
2602           *p++ = 'n';
2603         if (key->random)
2604           *p++ = 'R';
2605         *p = '\0';
2606         incompatible_options (opts);
2607       }
2608 }
2609
2610 /* Parse the leading integer in STRING and store the resulting value
2611    (which must fit into size_t) into *VAL.  Return the address of the
2612    suffix after the integer.  If the value is too large, silently
2613    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
2614    failure; otherwise, report MSGID and exit on failure.  */
2615
2616 static char const *
2617 parse_field_count (char const *string, size_t *val, char const *msgid)
2618 {
2619   char *suffix;
2620   uintmax_t n;
2621
2622   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2623     {
2624     case LONGINT_OK:
2625     case LONGINT_INVALID_SUFFIX_CHAR:
2626       *val = n;
2627       if (*val == n)
2628         break;
2629       /* Fall through.  */
2630     case LONGINT_OVERFLOW:
2631     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2632       *val = SIZE_MAX;
2633       break;
2634
2635     case LONGINT_INVALID:
2636       if (msgid)
2637         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2638                _(msgid), quote (string));
2639       return NULL;
2640     }
2641
2642   return suffix;
2643 }
2644
2645 /* Handle interrupts and hangups. */
2646
2647 static void
2648 sighandler (int sig)
2649 {
2650   if (! SA_NOCLDSTOP)
2651     signal (sig, SIG_IGN);
2652
2653   cleanup ();
2654
2655   signal (sig, SIG_DFL);
2656   raise (sig);
2657 }
2658
2659 /* Set the ordering options for KEY specified in S.
2660    Return the address of the first character in S that
2661    is not a valid ordering option.
2662    BLANKTYPE is the kind of blanks that 'b' should skip. */
2663
2664 static char *
2665 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2666 {
2667   while (*s)
2668     {
2669       switch (*s)
2670         {
2671         case 'b':
2672           if (blanktype == bl_start || blanktype == bl_both)
2673             key->skipsblanks = true;
2674           if (blanktype == bl_end || blanktype == bl_both)
2675             key->skipeblanks = true;
2676           break;
2677         case 'd':
2678           key->ignore = nondictionary;
2679           break;
2680         case 'f':
2681           key->translate = fold_toupper;
2682           break;
2683         case 'g':
2684           key->general_numeric = true;
2685           break;
2686         case 'i':
2687           /* Option order should not matter, so don't let -i override
2688              -d.  -d implies -i, but -i does not imply -d.  */
2689           if (! key->ignore)
2690             key->ignore = nonprinting;
2691           break;
2692         case 'M':
2693           key->month = true;
2694           break;
2695         case 'n':
2696           key->numeric = true;
2697           break;
2698         case 'R':
2699           key->random = true;
2700           break;
2701         case 'r':
2702           key->reverse = true;
2703           break;
2704         default:
2705           return (char *) s;
2706         }
2707       ++s;
2708     }
2709   return (char *) s;
2710 }
2711
2712 static struct keyfield *
2713 key_init (struct keyfield *key)
2714 {
2715   memset (key, 0, sizeof *key);
2716   key->eword = SIZE_MAX;
2717   return key;
2718 }
2719
2720 int
2721 main (int argc, char **argv)
2722 {
2723   struct keyfield *key;
2724   struct keyfield key_buf;
2725   struct keyfield gkey;
2726   char const *s;
2727   int c = 0;
2728   char checkonly = 0;
2729   bool mergeonly = false;
2730   char *random_source = NULL;
2731   bool need_random = false;
2732   size_t nfiles = 0;
2733   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2734   bool obsolete_usage = (posix2_version () < 200112);
2735   char **files;
2736   char const *outfile = NULL;
2737
2738   initialize_main (&argc, &argv);
2739   program_name = argv[0];
2740   setlocale (LC_ALL, "");
2741   bindtextdomain (PACKAGE, LOCALEDIR);
2742   textdomain (PACKAGE);
2743
2744   initialize_exit_failure (SORT_FAILURE);
2745
2746   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2747 #if HAVE_NL_LANGINFO
2748   hard_LC_TIME = hard_locale (LC_TIME);
2749 #endif
2750
2751   /* Get locale's representation of the decimal point.  */
2752   {
2753     struct lconv const *locale = localeconv ();
2754
2755     /* If the locale doesn't define a decimal point, or if the decimal
2756        point is multibyte, use the C locale's decimal point.  FIXME:
2757        add support for multibyte decimal points.  */
2758     decimal_point = to_uchar (locale->decimal_point[0]);
2759     if (! decimal_point || locale->decimal_point[1])
2760       decimal_point = '.';
2761
2762     /* FIXME: add support for multibyte thousands separators.  */
2763     thousands_sep = to_uchar (*locale->thousands_sep);
2764     if (! thousands_sep || locale->thousands_sep[1])
2765       thousands_sep = -1;
2766   }
2767
2768   have_read_stdin = false;
2769   inittables ();
2770
2771   {
2772     size_t i;
2773     static int const sig[] =
2774       {
2775         /* The usual suspects.  */
2776         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2777 #ifdef SIGPOLL
2778         SIGPOLL,
2779 #endif
2780 #ifdef SIGPROF
2781         SIGPROF,
2782 #endif
2783 #ifdef SIGVTALRM
2784         SIGVTALRM,
2785 #endif
2786 #ifdef SIGXCPU
2787         SIGXCPU,
2788 #endif
2789 #ifdef SIGXFSZ
2790         SIGXFSZ,
2791 #endif
2792       };
2793     enum { nsigs = sizeof sig / sizeof sig[0] };
2794
2795 #if SA_NOCLDSTOP
2796     struct sigaction act;
2797
2798     sigemptyset (&caught_signals);
2799     for (i = 0; i < nsigs; i++)
2800       {
2801         sigaction (sig[i], NULL, &act);
2802         if (act.sa_handler != SIG_IGN)
2803           sigaddset (&caught_signals, sig[i]);
2804       }
2805
2806     act.sa_handler = sighandler;
2807     act.sa_mask = caught_signals;
2808     act.sa_flags = 0;
2809
2810     for (i = 0; i < nsigs; i++)
2811       if (sigismember (&caught_signals, sig[i]))
2812         sigaction (sig[i], &act, NULL);
2813 #else
2814     for (i = 0; i < nsigs; i++)
2815       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2816         {
2817           signal (sig[i], sighandler);
2818           siginterrupt (sig[i], 1);
2819         }
2820 #endif
2821   }
2822
2823   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
2824   atexit (exit_cleanup);
2825
2826   gkey.sword = gkey.eword = SIZE_MAX;
2827   gkey.ignore = NULL;
2828   gkey.translate = NULL;
2829   gkey.numeric = gkey.general_numeric = gkey.random = false;
2830   gkey.month = gkey.reverse = false;
2831   gkey.skipsblanks = gkey.skipeblanks = false;
2832
2833   files = xnmalloc (argc, sizeof *files);
2834
2835   for (;;)
2836     {
2837       /* Parse an operand as a file after "--" was seen; or if
2838          pedantic and a file was seen, unless the POSIX version
2839          predates 1003.1-2001 and -c was not seen and the operand is
2840          "-o FILE" or "-oFILE".  */
2841
2842       if (c == -1
2843           || (posixly_correct && nfiles != 0
2844               && ! (obsolete_usage
2845                     && ! checkonly
2846                     && optind != argc
2847                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2848                     && (argv[optind][2] || optind + 1 != argc)))
2849           || ((c = getopt_long (argc, argv, short_options,
2850                                 long_options, NULL))
2851               == -1))
2852         {
2853           if (argc <= optind)
2854             break;
2855           files[nfiles++] = argv[optind++];
2856         }
2857       else switch (c)
2858         {
2859         case 1:
2860           key = NULL;
2861           if (optarg[0] == '+')
2862             {
2863               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2864                                       && ISDIGIT (argv[optind][1]));
2865               obsolete_usage |= minus_pos_usage & ~posixly_correct;
2866               if (obsolete_usage)
2867                 {
2868                   /* Treat +POS1 [-POS2] as a key if possible; but silently
2869                      treat an operand as a file if it is not a valid +POS1.  */
2870                   key = key_init (&key_buf);
2871                   s = parse_field_count (optarg + 1, &key->sword, NULL);
2872                   if (s && *s == '.')
2873                     s = parse_field_count (s + 1, &key->schar, NULL);
2874                   if (! (key->sword | key->schar))
2875                     key->sword = SIZE_MAX;
2876                   if (! s || *set_ordering (s, key, bl_start))
2877                     {
2878                       free (key);
2879                       key = NULL;
2880                     }
2881                   else
2882                     {
2883                       if (minus_pos_usage)
2884                         {
2885                           char const *optarg1 = argv[optind++];
2886                           s = parse_field_count (optarg1 + 1, &key->eword,
2887                                              N_("invalid number after `-'"));
2888                           if (*s == '.')
2889                             s = parse_field_count (s + 1, &key->echar,
2890                                                N_("invalid number after `.'"));
2891                           if (*set_ordering (s, key, bl_end))
2892                             badfieldspec (optarg1,
2893                                       N_("stray character in field spec"));
2894                         }
2895                       insertkey (key);
2896                     }
2897                 }
2898             }
2899           if (! key)
2900             files[nfiles++] = optarg;
2901           break;
2902
2903         case 'b':
2904         case 'd':
2905         case 'f':
2906         case 'g':
2907         case 'i':
2908         case 'M':
2909         case 'n':
2910         case 'r':
2911         case 'R':
2912           {
2913             char str[2];
2914             str[0] = c;
2915             str[1] = '\0';
2916             set_ordering (str, &gkey, bl_both);
2917           }
2918           break;
2919
2920         case CHECK_OPTION:
2921           c = (optarg
2922                ? XARGMATCH ("--check", optarg, check_args, check_types)
2923                : 'c');
2924           /* Fall through.  */
2925         case 'c':
2926         case 'C':
2927           if (checkonly && checkonly != c)
2928             incompatible_options ("cC");
2929           checkonly = c;
2930           break;
2931
2932         case COMPRESS_PROGRAM_OPTION:
2933           if (compress_program && strcmp (compress_program, optarg) != 0)
2934             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2935           compress_program = optarg;
2936           break;
2937
2938         case 'k':
2939           key = key_init (&key_buf);
2940
2941           /* Get POS1. */
2942           s = parse_field_count (optarg, &key->sword,
2943                                  N_("invalid number at field start"));
2944           if (! key->sword--)
2945             {
2946               /* Provoke with `sort -k0' */
2947               badfieldspec (optarg, N_("field number is zero"));
2948             }
2949           if (*s == '.')
2950             {
2951               s = parse_field_count (s + 1, &key->schar,
2952                                      N_("invalid number after `.'"));
2953               if (! key->schar--)
2954                 {
2955                   /* Provoke with `sort -k1.0' */
2956                   badfieldspec (optarg, N_("character offset is zero"));
2957                 }
2958             }
2959           if (! (key->sword | key->schar))
2960             key->sword = SIZE_MAX;
2961           s = set_ordering (s, key, bl_start);
2962           if (*s != ',')
2963             {
2964               key->eword = SIZE_MAX;
2965               key->echar = 0;
2966             }
2967           else
2968             {
2969               /* Get POS2. */
2970               s = parse_field_count (s + 1, &key->eword,
2971                                      N_("invalid number after `,'"));
2972               if (! key->eword--)
2973                 {
2974                   /* Provoke with `sort -k1,0' */
2975                   badfieldspec (optarg, N_("field number is zero"));
2976                 }
2977               if (*s == '.')
2978                 s = parse_field_count (s + 1, &key->echar,
2979                                        N_("invalid number after `.'"));
2980               else
2981                 {
2982                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2983                   key->eword++;
2984                 }
2985               s = set_ordering (s, key, bl_end);
2986             }
2987           if (*s)
2988             badfieldspec (optarg, N_("stray character in field spec"));
2989           insertkey (key);
2990           break;
2991
2992         case 'm':
2993           mergeonly = true;
2994           break;
2995
2996         case 'o':
2997           if (outfile && !STREQ (outfile, optarg))
2998             error (SORT_FAILURE, 0, _("multiple output files specified"));
2999           outfile = optarg;
3000           break;
3001
3002         case RANDOM_SOURCE_OPTION:
3003           if (random_source && !STREQ (random_source, optarg))
3004             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3005           random_source = optarg;
3006           break;
3007
3008         case 's':
3009           stable = true;
3010           break;
3011
3012         case 'S':
3013           specify_sort_size (optarg);
3014           break;
3015
3016         case 't':
3017           {
3018             char newtab = optarg[0];
3019             if (! newtab)
3020               error (SORT_FAILURE, 0, _("empty tab"));
3021             if (optarg[1])
3022               {
3023                 if (STREQ (optarg, "\\0"))
3024                   newtab = '\0';
3025                 else
3026                   {
3027                     /* Provoke with `sort -txx'.  Complain about
3028                        "multi-character tab" instead of "multibyte tab", so
3029                        that the diagnostic's wording does not need to be
3030                        changed once multibyte characters are supported.  */
3031                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3032                            quote (optarg));
3033                   }
3034               }
3035             if (tab != TAB_DEFAULT && tab != newtab)
3036               error (SORT_FAILURE, 0, _("incompatible tabs"));
3037             tab = newtab;
3038           }
3039           break;
3040
3041         case 'T':
3042           add_temp_dir (optarg);
3043           break;
3044
3045         case 'u':
3046           unique = true;
3047           break;
3048
3049         case 'y':
3050           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3051              through Solaris 7.  It is also accepted by many non-Solaris
3052              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3053              -y is marked as obsolete starting with Solaris 8 (1999), but is
3054              still accepted as of Solaris 10 prerelease (2004).
3055
3056              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3057              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3058              and which in general ignores the argument after "-y" if it
3059              consists entirely of digits (it can even be empty).  */
3060           if (optarg == argv[optind - 1])
3061             {
3062               char const *p;
3063               for (p = optarg; ISDIGIT (*p); p++)
3064                 continue;
3065               optind -= (*p != '\0');
3066             }
3067           break;
3068
3069         case 'z':
3070           eolchar = 0;
3071           break;
3072
3073         case_GETOPT_HELP_CHAR;
3074
3075         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3076
3077         default:
3078           usage (SORT_FAILURE);
3079         }
3080     }
3081
3082   /* Inheritance of global options to individual keys. */
3083   for (key = keylist; key; key = key->next)
3084     {
3085       if (! (key->ignore || key->translate
3086              || (key->skipsblanks | key->reverse
3087                  | key->skipeblanks | key->month | key->numeric
3088                  | key->general_numeric
3089                  | key->random)))
3090         {
3091           key->ignore = gkey.ignore;
3092           key->translate = gkey.translate;
3093           key->skipsblanks = gkey.skipsblanks;
3094           key->skipeblanks = gkey.skipeblanks;
3095           key->month = gkey.month;
3096           key->numeric = gkey.numeric;
3097           key->general_numeric = gkey.general_numeric;
3098           key->random = gkey.random;
3099           key->reverse = gkey.reverse;
3100         }
3101
3102       need_random |= key->random;
3103     }
3104
3105   if (!keylist && (gkey.ignore || gkey.translate
3106                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3107                        | gkey.numeric | gkey.general_numeric
3108                        | gkey.random)))
3109     {
3110       insertkey (&gkey);
3111       need_random |= gkey.random;
3112     }
3113
3114   check_ordering_compatibility ();
3115
3116   reverse = gkey.reverse;
3117
3118   if (need_random)
3119     {
3120       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3121       if (! randread_source)
3122         die (_("open failed"), random_source);
3123     }
3124
3125   if (temp_dir_count == 0)
3126     {
3127       char const *tmp_dir = getenv ("TMPDIR");
3128       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3129     }
3130
3131   if (nfiles == 0)
3132     {
3133       static char *minus = "-";
3134       nfiles = 1;
3135       free (files);
3136       files = &minus;
3137     }
3138
3139   if (checkonly)
3140     {
3141       if (nfiles > 1)
3142         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3143                quote (files[1]), checkonly);
3144
3145       if (outfile)
3146         {
3147           static char opts[] = {0, 'o', 0};
3148           opts[0] = checkonly;
3149           incompatible_options (opts);
3150         }
3151
3152       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3153          input is not properly sorted.  */
3154       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3155     }
3156
3157   if (mergeonly)
3158     {
3159       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3160       size_t i;
3161
3162       for (i = 0; i < nfiles; ++i)
3163         sortfiles[i].name = files[i];
3164
3165       merge (sortfiles, 0, nfiles, outfile);
3166     }
3167   else
3168     sort (files, nfiles, outfile);
3169
3170   if (have_read_stdin && fclose (stdin) == EOF)
3171     die (_("close failed"), "-");
3172
3173   exit (EXIT_SUCCESS);
3174 }