src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17    Written December 1988 by Mike Haertel.
  18    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  19    or (US mail) as Mike Haertel c/o Free Software Foundation.
  20
  21    Ørn E. Hansen added NLS support in 1997.  */
  22
  23 #include <config.h>
  24
  25 #include <getopt.h>
  26 #include <sys/types.h>
  27 #include <sys/wait.h>
  28 #include <signal.h>
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "error.h"
  32 #include "hard-locale.h"
  33 #include "hash.h"
  34 #include "md5.h"
  35 #include "physmem.h"
  36 #include "posixver.h"
  37 #include "quote.h"
  38 #include "quotearg.h"
  39 #include "randread.h"
  40 #include "readtokens0.h"
  41 #include "stdio--.h"
  42 #include "stdlib--.h"
  43 #include "strnumcmp.h"
  44 #include "xmemcoll.h"
  45 #include "xmemxfrm.h"
  46 #include "xstrtol.h"
  47
  48 #if HAVE_SYS_RESOURCE_H
  49 # include <sys/resource.h>
  50 #endif
  51 #ifndef RLIMIT_DATA
  52 struct rlimit { size_t rlim_cur; };
  53 # define getrlimit(Resource, Rlp) (-1)
  54 #endif
  55
  56 /* The official name of this program (e.g., no `g' prefix).  */
  57 #define PROGRAM_NAME "sort"
  58
  59 #define AUTHORS \
  60   proper_name ("Mike Haertel"), \
  61   proper_name ("Paul Eggert")
  62
  63 #if HAVE_LANGINFO_CODESET
  64 # include <langinfo.h>
  65 #endif
  66
  67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  68    present.  */
  69 #ifndef SA_NOCLDSTOP
  70 # define SA_NOCLDSTOP 0
  71 /* No sigprocmask.  Always 'return' zero. */
  72 # define sigprocmask(How, Set, Oset) (0)
  73 # define sigset_t int
  74 # if ! HAVE_SIGINTERRUPT
  75 #  define siginterrupt(sig, flag) /* empty */
  76 # endif
  77 #endif
  78
  79 #if !defined OPEN_MAX && defined NR_OPEN
  80 # define OPEN_MAX NR_OPEN
  81 #endif
  82 #if !defined OPEN_MAX
  83 # define OPEN_MAX 20
  84 #endif
  85
  86 #define UCHAR_LIM (UCHAR_MAX + 1)
  87
  88 #ifndef DEFAULT_TMPDIR
  89 # define DEFAULT_TMPDIR "/tmp"
  90 #endif
  91
  92 /* Exit statuses.  */
  93 enum
  94   {
  95     /* POSIX says to exit with status 1 if invoked with -c and the
  96        input is not properly sorted.  */
  97     SORT_OUT_OF_ORDER = 1,
  98
  99     /* POSIX says any other irregular exit must exit with a status
 100        code greater than 1.  */
 101     SORT_FAILURE = 2
 102   };
 103
 104 enum
 105   {
 106     /* The number of times we should try to fork a compression process
 107        (we retry if the fork call fails).  We don't _need_ to compress
 108        temp files, this is just to reduce disk access, so this number
 109        can be small.  */
 110     MAX_FORK_TRIES_COMPRESS = 2,
 111
 112     /* The number of times we should try to fork a decompression process.
 113        If we can't fork a decompression process, we can't sort, so this
 114        number should be big.  */
 115     MAX_FORK_TRIES_DECOMPRESS = 8
 116   };
 117
 118 /* The representation of the decimal point in the current locale.  */
 119 static int decimal_point;
 120
 121 /* Thousands separator; if -1, then there isn't one.  */
 122 static int thousands_sep;
 123
 124 /* Nonzero if the corresponding locales are hard.  */
 125 static bool hard_LC_COLLATE;
 126 #if HAVE_NL_LANGINFO
 127 static bool hard_LC_TIME;
 128 #endif
 129
 130 #define NONZERO(x) ((x) != 0)
 131
 132 /* The kind of blanks for '-b' to skip in various options. */
 133 enum blanktype { bl_start, bl_end, bl_both };
 134
 135 /* The character marking end of line. Default to \n. */
 136 static char eolchar = '\n';
 137
 138 /* Lines are held in core as counted strings. */
 139 struct line
 140 {
 141   char *text;                   /* Text of the line. */
 142   size_t length;                /* Length including final newline. */
 143   char *keybeg;                 /* Start of first key. */
 144   char *keylim;                 /* Limit of first key. */
 145 };
 146
 147 /* Input buffers. */
 148 struct buffer
 149 {
 150   char *buf;                    /* Dynamically allocated buffer,
 151                                    partitioned into 3 regions:
 152                                    - input data;
 153                                    - unused area;
 154                                    - an array of lines, in reverse order.  */
 155   size_t used;                  /* Number of bytes used for input data.  */
 156   size_t nlines;                /* Number of lines in the line array.  */
 157   size_t alloc;                 /* Number of bytes allocated. */
 158   size_t left;                  /* Number of bytes left from previous reads. */
 159   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 160   bool eof;                     /* An EOF has been read.  */
 161 };
 162
 163 struct keyfield
 164 {
 165   size_t sword;                 /* Zero-origin 'word' to start at. */
 166   size_t schar;                 /* Additional characters to skip. */
 167   size_t eword;                 /* Zero-origin first word after field. */
 168   size_t echar;                 /* Additional characters in field. */
 169   bool const *ignore;           /* Boolean array of characters to ignore. */
 170   char const *translate;        /* Translation applied to characters. */
 171   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 172   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 173   bool numeric;                 /* Flag for numeric comparison.  Handle
 174                                    strings of digits with optional decimal
 175                                    point, but no exponential notation. */
 176   bool random;                  /* Sort by random hash of key.  */
 177   bool general_numeric;         /* Flag for general, numeric comparison.
 178                                    Handle numbers in exponential notation. */
 179   bool month;                   /* Flag for comparison by month name. */
 180   bool reverse;                 /* Reverse the sense of comparison. */
 181   struct keyfield *next;        /* Next keyfield to try. */
 182 };
 183
 184 struct month
 185 {
 186   char const *name;
 187   int val;
 188 };
 189
 190 /* FIXME: None of these tables work with multibyte character sets.
 191    Also, there are many other bugs when handling multibyte characters.
 192    One way to fix this is to rewrite `sort' to use wide characters
 193    internally, but doing this with good performance is a bit
 194    tricky.  */
 195
 196 /* Table of blanks.  */
 197 static bool blanks[UCHAR_LIM];
 198
 199 /* Table of non-printing characters. */
 200 static bool nonprinting[UCHAR_LIM];
 201
 202 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 203 static bool nondictionary[UCHAR_LIM];
 204
 205 /* Translation table folding lower case to upper.  */
 206 static char fold_toupper[UCHAR_LIM];
 207
 208 #define MONTHS_PER_YEAR 12
 209
 210 /* Table mapping month names to integers.
 211    Alphabetic order allows binary search. */
 212 static struct month monthtab[] =
 213 {
 214   {"APR", 4},
 215   {"AUG", 8},
 216   {"DEC", 12},
 217   {"FEB", 2},
 218   {"JAN", 1},
 219   {"JUL", 7},
 220   {"JUN", 6},
 221   {"MAR", 3},
 222   {"MAY", 5},
 223   {"NOV", 11},
 224   {"OCT", 10},
 225   {"SEP", 9}
 226 };
 227
 228 /* During the merge phase, the number of files to merge at once. */
 229 #define NMERGE_DEFAULT 16
 230
 231 /* Minimum size for a merge or check buffer.  */
 232 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 233
 234 /* Minimum sort size; the code might not work with smaller sizes.  */
 235 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
 236
 237 /* The number of bytes needed for a merge or check buffer, which can
 238    function relatively efficiently even if it holds only one line.  If
 239    a longer line is seen, this value is increased.  */
 240 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 241
 242 /* The approximate maximum number of bytes of main memory to use, as
 243    specified by the user.  Zero if the user has not specified a size.  */
 244 static size_t sort_size;
 245
 246 /* The guessed size for non-regular files.  */
 247 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 248
 249 /* Array of directory names in which any temporary files are to be created. */
 250 static char const **temp_dirs;
 251
 252 /* Number of temporary directory names used.  */
 253 static size_t temp_dir_count;
 254
 255 /* Number of allocated slots in temp_dirs.  */
 256 static size_t temp_dir_alloc;
 257
 258 /* Flag to reverse the order of all comparisons. */
 259 static bool reverse;
 260
 261 /* Flag for stable sort.  This turns off the last ditch bytewise
 262    comparison of lines, and instead leaves lines in the same order
 263    they were read if all keys compare equal.  */
 264 static bool stable;
 265
 266 /* If TAB has this value, blanks separate fields.  */
 267 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 268
 269 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 270    separated by the empty string between a non-blank character and a blank
 271    character. */
 272 static int tab = TAB_DEFAULT;
 273
 274 /* Flag to remove consecutive duplicate lines from the output.
 275    Only the last of a sequence of equal lines will be output. */
 276 static bool unique;
 277
 278 /* Nonzero if any of the input files are the standard input. */
 279 static bool have_read_stdin;
 280
 281 /* List of key field comparisons to be tried.  */
 282 static struct keyfield *keylist;
 283
 284 /* Program used to (de)compress temp files.  Must accept -d.  */
 285 static char const *compress_program;
 286
 287 /* Maximum number of files to merge in one go.  If more than this
 288    number are present, temp files will be used. */
 289 static unsigned int nmerge = NMERGE_DEFAULT;
 290
 291 static void sortlines_temp (struct line *, size_t, struct line *);
 292
 293 /* Report MESSAGE for FILE, then clean up and exit.
 294    If FILE is null, it represents standard output.  */
 295
 296 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 297 static void
 298 die (char const *message, char const *file)
 299 {
 300   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 301   exit (SORT_FAILURE);
 302 }
 303
 304 void
 305 usage (int status)
 306 {
 307   if (status != EXIT_SUCCESS)
 308     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 309              program_name);
 310   else
 311     {
 312       printf (_("\
 313 Usage: %s [OPTION]... [FILE]...\n\
 314   or:  %s [OPTION]... --files0-from=F\n\
 315 "),
 316               program_name, program_name);
 317       fputs (_("\
 318 Write sorted concatenation of all FILE(s) to standard output.\n\
 319 \n\
 320 "), stdout);
 321       fputs (_("\
 322 Mandatory arguments to long options are mandatory for short options too.\n\
 323 "), stdout);
 324       fputs (_("\
 325 Ordering options:\n\
 326 \n\
 327 "), stdout);
 328       fputs (_("\
 329   -b, --ignore-leading-blanks  ignore leading blanks\n\
 330   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 331   -f, --ignore-case           fold lower case to upper case characters\n\
 332 "), stdout);
 333       fputs (_("\
 334   -g, --general-numeric-sort  compare according to general numerical value\n\
 335   -i, --ignore-nonprinting    consider only printable characters\n\
 336   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 337   -n, --numeric-sort          compare according to string numerical value\n\
 338   -R, --random-sort           sort by random hash of keys\n\
 339       --random-source=FILE    get random bytes from FILE (default /dev/urandom)\n\
 340       --sort=WORD             sort according to WORD:\n\
 341                                 general-numeric -g, month -M, numeric -n,\n\
 342                                 random -R\n\
 343   -r, --reverse               reverse the result of comparisons\n\
 344 \n\
 345 "), stdout);
 346       fputs (_("\
 347 Other options:\n\
 348 \n\
 349 "), stdout);
 350       fputs (_("\
 351       --batch-size=NMERGE   merge at most NMERGE inputs at once;\n\
 352                             for more use temp files\n\
 353 "), stdout);
 354       fputs (_("\
 355   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 356   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 357       --compress-program=PROG  compress temporaries with PROG;\n\
 358                               decompress them with PROG -d\n\
 359       --files0-from=F       read input from the files specified by\n\
 360                             NUL-terminated names in file F\n\
 361 "), stdout);
 362       fputs (_("\
 363   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
 364   -m, --merge               merge already sorted files; do not sort\n\
 365 "), stdout);
 366       fputs (_("\
 367   -o, --output=FILE         write result to FILE instead of standard output\n\
 368   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 369   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 370 "), stdout);
 371       printf (_("\
 372   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 373   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 374                               multiple options specify multiple directories\n\
 375   -u, --unique              with -c, check for strict ordering;\n\
 376                               without -c, output only the first of an equal run\n\
 377 "), DEFAULT_TMPDIR);
 378       fputs (_("\
 379   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 380 "), stdout);
 381       fputs (HELP_OPTION_DESCRIPTION, stdout);
 382       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 383       fputs (_("\
 384 \n\
 385 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 386 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 387 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 388 one or more single-letter ordering options, which override global ordering\n\
 389 options for that key.  If no key is given, use the entire line as the key.\n\
 390 \n\
 391 SIZE may be followed by the following multiplicative suffixes:\n\
 392 "), stdout);
 393       fputs (_("\
 394 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 395 \n\
 396 With no FILE, or when FILE is -, read standard input.\n\
 397 \n\
 398 *** WARNING ***\n\
 399 The locale specified by the environment affects sort order.\n\
 400 Set LC_ALL=C to get the traditional sort order that uses\n\
 401 native byte values.\n\
 402 "), stdout );
 403       emit_bug_reporting_address ();
 404     }
 405
 406   exit (status);
 407 }
 408
 409 /* For long options that have no equivalent short option, use a
 410    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 411 enum
 412 {
 413   CHECK_OPTION = CHAR_MAX + 1,
 414   COMPRESS_PROGRAM_OPTION,
 415   FILES0_FROM_OPTION,
 416   NMERGE_OPTION,
 417   RANDOM_SOURCE_OPTION,
 418   SORT_OPTION
 419 };
 420
 421 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
 422
 423 static struct option const long_options[] =
 424 {
 425   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 426   {"check", optional_argument, NULL, CHECK_OPTION},
 427   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 428   {"dictionary-order", no_argument, NULL, 'd'},
 429   {"ignore-case", no_argument, NULL, 'f'},
 430   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
 431   {"general-numeric-sort", no_argument, NULL, 'g'},
 432   {"ignore-nonprinting", no_argument, NULL, 'i'},
 433   {"key", required_argument, NULL, 'k'},
 434   {"merge", no_argument, NULL, 'm'},
 435   {"month-sort", no_argument, NULL, 'M'},
 436   {"numeric-sort", no_argument, NULL, 'n'},
 437   {"random-sort", no_argument, NULL, 'R'},
 438   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 439   {"sort", required_argument, NULL, SORT_OPTION},
 440   {"output", required_argument, NULL, 'o'},
 441   {"reverse", no_argument, NULL, 'r'},
 442   {"stable", no_argument, NULL, 's'},
 443   {"batch-size", required_argument, NULL, NMERGE_OPTION},
 444   {"buffer-size", required_argument, NULL, 'S'},
 445   {"field-separator", required_argument, NULL, 't'},
 446   {"temporary-directory", required_argument, NULL, 'T'},
 447   {"unique", no_argument, NULL, 'u'},
 448   {"zero-terminated", no_argument, NULL, 'z'},
 449   {GETOPT_HELP_OPTION_DECL},
 450   {GETOPT_VERSION_OPTION_DECL},
 451   {NULL, 0, NULL, 0},
 452 };
 453
 454 static char const *const check_args[] =
 455 {
 456   "quiet", "silent", "diagnose-first", NULL
 457 };
 458 static char const check_types[] =
 459 {
 460   'C', 'C', 'c'
 461 };
 462 ARGMATCH_VERIFY (check_args, check_types);
 463
 464 static char const *const sort_args[] =
 465 {
 466   "general-numeric", "month", "numeric", "random", NULL
 467 };
 468 static char const sort_types[] =
 469 {
 470   'g', 'M', 'n', 'R'
 471 };
 472 ARGMATCH_VERIFY (sort_args, sort_types);
 473
 474 /* The set of signals that are caught.  */
 475 static sigset_t caught_signals;
 476
 477 /* Critical section status.  */
 478 struct cs_status
 479 {
 480   bool valid;
 481   sigset_t sigs;
 482 };
 483
 484 /* Enter a critical section.  */
 485 static struct cs_status
 486 cs_enter (void)
 487 {
 488   struct cs_status status;
 489   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 490   return status;
 491 }
 492
 493 /* Leave a critical section.  */
 494 static void
 495 cs_leave (struct cs_status status)
 496 {
 497   if (status.valid)
 498     {
 499       /* Ignore failure when restoring the signal mask. */
 500       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 501     }
 502 }
 503
 504 /* The list of temporary files. */
 505 struct tempnode
 506 {
 507   struct tempnode *volatile next;
 508   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 509   char name[1];  /* Actual size is 1 + file name length.  */
 510 };
 511 static struct tempnode *volatile temphead;
 512 static struct tempnode *volatile *temptail = &temphead;
 513
 514 struct sortfile
 515 {
 516   char const *name;
 517   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 518 };
 519
 520 /* A table where we store compression process states.  We clean up all
 521    processes in a timely manner so as not to exhaust system resources,
 522    so we store the info on whether the process is still running, or has
 523    been reaped here.  */
 524 static Hash_table *proctab;
 525
 526 enum { INIT_PROCTAB_SIZE = 47 };
 527
 528 enum procstate { ALIVE, ZOMBIE };
 529
 530 /* A proctab entry.  The COUNT field is there in case we fork a new
 531    compression process that has the same PID as an old zombie process
 532    that is still in the table (because the process to decompress the
 533    temp file it was associated with hasn't started yet).  */
 534 struct procnode
 535 {
 536   pid_t pid;
 537   enum procstate state;
 538   size_t count;
 539 };
 540
 541 static size_t
 542 proctab_hasher (const void *entry, size_t tabsize)
 543 {
 544   const struct procnode *node = entry;
 545   return node->pid % tabsize;
 546 }
 547
 548 static bool
 549 proctab_comparator (const void *e1, const void *e2)
 550 {
 551   const struct procnode *n1 = e1, *n2 = e2;
 552   return n1->pid == n2->pid;
 553 }
 554
 555 /* The total number of forked processes (compressors and decompressors)
 556    that have not been reaped yet. */
 557 static size_t nprocs;
 558
 559 /* The number of child processes we'll allow before we try to reap some. */
 560 enum { MAX_PROCS_BEFORE_REAP = 2 };
 561
 562 /* If 0 < PID, wait for the child process with that PID to exit.
 563    If PID is -1, clean up a random child process which has finished and
 564    return the process ID of that child.  If PID is -1 and no processes
 565    have quit yet, return 0 without waiting.  */
 566
 567 static pid_t
 568 reap (pid_t pid)
 569 {
 570   int status;
 571   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 572
 573   if (cpid < 0)
 574     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 575            compress_program);
 576   else if (0 < cpid)
 577     {
 578       if (! WIFEXITED (status) || WEXITSTATUS (status))
 579         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 580                compress_program);
 581       --nprocs;
 582     }
 583
 584   return cpid;
 585 }
 586
 587 /* Add the PID of a running compression process to proctab, or update
 588    the entry COUNT and STATE fields if it's already there.  This also
 589    creates the table for us the first time it's called.  */
 590
 591 static void
 592 register_proc (pid_t pid)
 593 {
 594   struct procnode test, *node;
 595
 596   if (! proctab)
 597     {
 598       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 599                                  proctab_hasher,
 600                                  proctab_comparator,
 601                                  free);
 602       if (! proctab)
 603         xalloc_die ();
 604     }
 605
 606   test.pid = pid;
 607   node = hash_lookup (proctab, &test);
 608   if (node)
 609     {
 610       node->state = ALIVE;
 611       ++node->count;
 612     }
 613   else
 614     {
 615       node = xmalloc (sizeof *node);
 616       node->pid = pid;
 617       node->state = ALIVE;
 618       node->count = 1;
 619       hash_insert (proctab, node);
 620     }
 621 }
 622
 623 /* This is called when we reap a random process.  We don't know
 624    whether we have reaped a compression process or a decompression
 625    process until we look in the table.  If there's an ALIVE entry for
 626    it, then we have reaped a compression process, so change the state
 627    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 628
 629 static void
 630 update_proc (pid_t pid)
 631 {
 632   struct procnode test, *node;
 633
 634   test.pid = pid;
 635   node = hash_lookup (proctab, &test);
 636   if (node)
 637     node->state = ZOMBIE;
 638 }
 639
 640 /* This is for when we need to wait for a compression process to exit.
 641    If it has a ZOMBIE entry in the table then it's already dead and has
 642    been reaped.  Note that if there's an ALIVE entry for it, it still may
 643    already have died and been reaped if a second process was created with
 644    the same PID.  This is probably exceedingly rare, but to be on the safe
 645    side we will have to wait for any compression process with this PID.  */
 646
 647 static void
 648 wait_proc (pid_t pid)
 649 {
 650   struct procnode test, *node;
 651
 652   test.pid = pid;
 653   node = hash_lookup (proctab, &test);
 654   if (node->state == ALIVE)
 655     reap (pid);
 656
 657   node->state = ZOMBIE;
 658   if (! --node->count)
 659     {
 660       hash_delete (proctab, node);
 661       free (node);
 662     }
 663 }
 664
 665 /* Keep reaping finished children as long as there are more to reap.
 666    This doesn't block waiting for any of them, it only reaps those
 667    that are already dead.  */
 668
 669 static void
 670 reap_some (void)
 671 {
 672   pid_t pid;
 673
 674   while (0 < nprocs && (pid = reap (-1)))
 675     update_proc (pid);
 676 }
 677
 678 /* Clean up any remaining temporary files.  */
 679
 680 static void
 681 cleanup (void)
 682 {
 683   struct tempnode const *node;
 684
 685   for (node = temphead; node; node = node->next)
 686     unlink (node->name);
 687   temphead = NULL;
 688 }
 689
 690 /* Cleanup actions to take when exiting.  */
 691
 692 static void
 693 exit_cleanup (void)
 694 {
 695   if (temphead)
 696     {
 697       /* Clean up any remaining temporary files in a critical section so
 698          that a signal handler does not try to clean them too.  */
 699       struct cs_status cs = cs_enter ();
 700       cleanup ();
 701       cs_leave (cs);
 702     }
 703
 704   close_stdout ();
 705 }
 706
 707 /* Create a new temporary file, returning its newly allocated tempnode.
 708    Store into *PFD the file descriptor open for writing.  */
 709
 710 static struct tempnode *
 711 create_temp_file (int *pfd)
 712 {
 713   static char const slashbase[] = "/sortXXXXXX";
 714   static size_t temp_dir_index;
 715   int fd;
 716   int saved_errno;
 717   char const *temp_dir = temp_dirs[temp_dir_index];
 718   size_t len = strlen (temp_dir);
 719   struct tempnode *node =
 720     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 721   char *file = node->name;
 722   struct cs_status cs;
 723
 724   memcpy (file, temp_dir, len);
 725   memcpy (file + len, slashbase, sizeof slashbase);
 726   node->next = NULL;
 727   node->pid = 0;
 728   if (++temp_dir_index == temp_dir_count)
 729     temp_dir_index = 0;
 730
 731   /* Create the temporary file in a critical section, to avoid races.  */
 732   cs = cs_enter ();
 733   fd = mkstemp (file);
 734   if (0 <= fd)
 735     {
 736       *temptail = node;
 737       temptail = &node->next;
 738     }
 739   saved_errno = errno;
 740   cs_leave (cs);
 741   errno = saved_errno;
 742
 743   if (fd < 0)
 744     error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
 745            quote (temp_dir));
 746
 747   *pfd = fd;
 748   return node;
 749 }
 750
 751 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 752    standard output; HOW should be "w".  When opening for input, "-"
 753    means standard input.  To avoid confusion, do not return file
 754    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 755    opening an ordinary FILE.  */
 756
 757 static FILE *
 758 xfopen (const char *file, const char *how)
 759 {
 760   FILE *fp;
 761
 762   if (!file)
 763     fp = stdout;
 764   else if (STREQ (file, "-") && *how == 'r')
 765     {
 766       have_read_stdin = true;
 767       fp = stdin;
 768     }
 769   else
 770     {
 771       fp = fopen (file, how);
 772       if (! fp)
 773         die (_("open failed"), file);
 774     }
 775
 776   return fp;
 777 }
 778
 779 /* Close FP, whose name is FILE, and report any errors.  */
 780
 781 static void
 782 xfclose (FILE *fp, char const *file)
 783 {
 784   switch (fileno (fp))
 785     {
 786     case STDIN_FILENO:
 787       /* Allow reading stdin from tty more than once.  */
 788       if (feof (fp))
 789         clearerr (fp);
 790       break;
 791
 792     case STDOUT_FILENO:
 793       /* Don't close stdout just yet.  close_stdout does that.  */
 794       if (fflush (fp) != 0)
 795         die (_("fflush failed"), file);
 796       break;
 797
 798     default:
 799       if (fclose (fp) != 0)
 800         die (_("close failed"), file);
 801       break;
 802     }
 803 }
 804
 805 static void
 806 dup2_or_die (int oldfd, int newfd)
 807 {
 808   if (dup2 (oldfd, newfd) < 0)
 809     error (SORT_FAILURE, errno, _("dup2 failed"));
 810 }
 811
 812 /* Fork a child process for piping to and do common cleanup.  The
 813    TRIES parameter tells us how many times to try to fork before
 814    giving up.  Return the PID of the child or -1 if fork failed.  */
 815
 816 static pid_t
 817 pipe_fork (int pipefds[2], size_t tries)
 818 {
 819 #if HAVE_WORKING_FORK
 820   struct tempnode *saved_temphead;
 821   int saved_errno;
 822   unsigned int wait_retry = 1;
 823   pid_t pid IF_LINT (= -1);
 824   struct cs_status cs;
 825
 826   if (pipe (pipefds) < 0)
 827     return -1;
 828
 829   while (tries--)
 830     {
 831       /* This is so the child process won't delete our temp files
 832          if it receives a signal before exec-ing.  */
 833       cs = cs_enter ();
 834       saved_temphead = temphead;
 835       temphead = NULL;
 836
 837       pid = fork ();
 838       saved_errno = errno;
 839       if (pid)
 840         temphead = saved_temphead;
 841
 842       cs_leave (cs);
 843       errno = saved_errno;
 844
 845       if (0 <= pid || errno != EAGAIN)
 846         break;
 847       else
 848         {
 849           sleep (wait_retry);
 850           wait_retry *= 2;
 851           reap_some ();
 852         }
 853     }
 854
 855   if (pid < 0)
 856     {
 857       close (pipefds[0]);
 858       close (pipefds[1]);
 859     }
 860   else if (pid == 0)
 861     {
 862       close (STDIN_FILENO);
 863       close (STDOUT_FILENO);
 864     }
 865   else
 866     ++nprocs;
 867
 868   return pid;
 869
 870 #else  /* ! HAVE_WORKING_FORK */
 871   return -1;
 872 #endif
 873 }
 874
 875 /* Create a temporary file and start a compression program to filter output
 876    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 877    set it to the PID of the newly-created process.  */
 878
 879 static char *
 880 create_temp (FILE **pfp, pid_t *ppid)
 881 {
 882   int tempfd;
 883   struct tempnode *node = create_temp_file (&tempfd);
 884   char *name = node->name;
 885
 886   if (compress_program)
 887     {
 888       int pipefds[2];
 889
 890       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 891       if (0 < node->pid)
 892         {
 893           close (tempfd);
 894           close (pipefds[0]);
 895           tempfd = pipefds[1];
 896
 897           register_proc (node->pid);
 898         }
 899       else if (node->pid == 0)
 900         {
 901           close (pipefds[1]);
 902           dup2_or_die (tempfd, STDOUT_FILENO);
 903           close (tempfd);
 904           dup2_or_die (pipefds[0], STDIN_FILENO);
 905           close (pipefds[0]);
 906
 907           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 908             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 909                    compress_program);
 910         }
 911       else
 912         node->pid = 0;
 913     }
 914
 915   *pfp = fdopen (tempfd, "w");
 916   if (! *pfp)
 917     die (_("couldn't create temporary file"), name);
 918
 919   if (ppid)
 920     *ppid = node->pid;
 921
 922   return name;
 923 }
 924
 925 /* Open a compressed temp file and start a decompression process through
 926    which to filter the input.  PID must be the valid processes ID of the
 927    process used to compress the file.  */
 928
 929 static FILE *
 930 open_temp (const char *name, pid_t pid)
 931 {
 932   int tempfd, pipefds[2];
 933   pid_t child_pid;
 934   FILE *fp;
 935
 936   wait_proc (pid);
 937
 938   tempfd = open (name, O_RDONLY);
 939   if (tempfd < 0)
 940     die (_("couldn't open temporary file"), name);
 941
 942   child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
 943   if (0 < child_pid)
 944     {
 945       close (tempfd);
 946       close (pipefds[1]);
 947     }
 948   else if (child_pid == 0)
 949     {
 950       close (pipefds[0]);
 951       dup2_or_die (tempfd, STDIN_FILENO);
 952       close (tempfd);
 953       dup2_or_die (pipefds[1], STDOUT_FILENO);
 954       close (pipefds[1]);
 955
 956       if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
 957         error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
 958                compress_program);
 959     }
 960   else
 961     error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
 962            compress_program);
 963
 964   fp = fdopen (pipefds[0], "r");
 965   if (! fp)
 966     die (_("couldn't create temporary file"), name);
 967
 968   return fp;
 969 }
 970
 971 static void
 972 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 973 {
 974   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 975     die (_("write failed"), output_file);
 976 }
 977
 978 /* Append DIR to the array of temporary directory names.  */
 979 static void
 980 add_temp_dir (char const *dir)
 981 {
 982   if (temp_dir_count == temp_dir_alloc)
 983     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
 984
 985   temp_dirs[temp_dir_count++] = dir;
 986 }
 987
 988 /* Remove NAME from the list of temporary files.  */
 989
 990 static void
 991 zaptemp (const char *name)
 992 {
 993   struct tempnode *volatile *pnode;
 994   struct tempnode *node;
 995   struct tempnode *next;
 996   int unlink_status;
 997   int unlink_errno = 0;
 998   struct cs_status cs;
 999
1000   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1001     continue;
1002
1003   /* Unlink the temporary file in a critical section to avoid races.  */
1004   next = node->next;
1005   cs = cs_enter ();
1006   unlink_status = unlink (name);
1007   unlink_errno = errno;
1008   *pnode = next;
1009   cs_leave (cs);
1010
1011   if (unlink_status != 0)
1012     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1013   if (! next)
1014     temptail = pnode;
1015   free (node);
1016 }
1017
1018 #if HAVE_NL_LANGINFO
1019
1020 static int
1021 struct_month_cmp (const void *m1, const void *m2)
1022 {
1023   struct month const *month1 = m1;
1024   struct month const *month2 = m2;
1025   return strcmp (month1->name, month2->name);
1026 }
1027
1028 #endif
1029
1030 /* Initialize the character class tables. */
1031
1032 static void
1033 inittables (void)
1034 {
1035   size_t i;
1036
1037   for (i = 0; i < UCHAR_LIM; ++i)
1038     {
1039       blanks[i] = !! isblank (i);
1040       nonprinting[i] = ! isprint (i);
1041       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1042       fold_toupper[i] = toupper (i);
1043     }
1044
1045 #if HAVE_NL_LANGINFO
1046   /* If we're not in the "C" locale, read different names for months.  */
1047   if (hard_LC_TIME)
1048     {
1049       for (i = 0; i < MONTHS_PER_YEAR; i++)
1050         {
1051           char const *s;
1052           size_t s_len;
1053           size_t j;
1054           char *name;
1055
1056           s = (char *) nl_langinfo (ABMON_1 + i);
1057           s_len = strlen (s);
1058           monthtab[i].name = name = xmalloc (s_len + 1);
1059           monthtab[i].val = i + 1;
1060
1061           for (j = 0; j < s_len; j++)
1062             name[j] = fold_toupper[to_uchar (s[j])];
1063           name[j] = '\0';
1064         }
1065       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1066              sizeof *monthtab, struct_month_cmp);
1067     }
1068 #endif
1069 }
1070
1071 /* Specify how many inputs may be merged at once.
1072    This may be set on the command-line with the
1073    --batch-size option. */
1074 static void
1075 specify_nmerge (int oi, char c, char const *s)
1076 {
1077   uintmax_t n;
1078   struct rlimit rlimit;
1079   enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1080
1081   /* Try to find out how many file descriptors we'll be able
1082      to open.  We need at least nmerge + 3 (STDIN_FILENO,
1083      STDOUT_FILENO and STDERR_FILENO). */
1084   unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1085                               ? rlimit.rlim_cur
1086                               : OPEN_MAX)
1087                              - 3);
1088
1089   if (e == LONGINT_OK)
1090     {
1091       nmerge = n;
1092       if (nmerge != n)
1093         e = LONGINT_OVERFLOW;
1094       else
1095         {
1096           if (nmerge < 2)
1097             {
1098               error (0, 0, _("invalid --%s argument %s"),
1099                      long_options[oi].name, quote(s));
1100               error (SORT_FAILURE, 0,
1101                      _("minimum --%s argument is %s"),
1102                      long_options[oi].name, quote("2"));
1103             }
1104           else if (max_nmerge < nmerge)
1105             {
1106               e = LONGINT_OVERFLOW;
1107             }
1108           else
1109             return;
1110         }
1111     }
1112
1113   if (e == LONGINT_OVERFLOW)
1114     {
1115       char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1116       error (0, 0, _("--%s argument %s too large"),
1117              long_options[oi].name, quote(s));
1118       error (SORT_FAILURE, 0,
1119              _("maximum --%s argument with current rlimit is %s"),
1120              long_options[oi].name,
1121              uinttostr (max_nmerge, max_nmerge_buf));
1122     }
1123   else
1124     xstrtol_fatal (e, oi, c, long_options, s);
1125 }
1126
1127 /* Specify the amount of main memory to use when sorting.  */
1128 static void
1129 specify_sort_size (int oi, char c, char const *s)
1130 {
1131   uintmax_t n;
1132   char *suffix;
1133   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1134
1135   /* The default unit is KiB.  */
1136   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1137     {
1138       if (n <= UINTMAX_MAX / 1024)
1139         n *= 1024;
1140       else
1141         e = LONGINT_OVERFLOW;
1142     }
1143
1144   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1145   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1146     switch (suffix[0])
1147       {
1148       case 'b':
1149         e = LONGINT_OK;
1150         break;
1151
1152       case '%':
1153         {
1154           double mem = physmem_total () * n / 100;
1155
1156           /* Use "<", not "<=", to avoid problems with rounding.  */
1157           if (mem < UINTMAX_MAX)
1158             {
1159               n = mem;
1160               e = LONGINT_OK;
1161             }
1162           else
1163             e = LONGINT_OVERFLOW;
1164         }
1165         break;
1166       }
1167
1168   if (e == LONGINT_OK)
1169     {
1170       /* If multiple sort sizes are specified, take the maximum, so
1171          that option order does not matter.  */
1172       if (n < sort_size)
1173         return;
1174
1175       sort_size = n;
1176       if (sort_size == n)
1177         {
1178           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1179           return;
1180         }
1181
1182       e = LONGINT_OVERFLOW;
1183     }
1184
1185   xstrtol_fatal (e, oi, c, long_options, s);
1186 }
1187
1188 /* Return the default sort size.  */
1189 static size_t
1190 default_sort_size (void)
1191 {
1192   /* Let MEM be available memory or 1/8 of total memory, whichever
1193      is greater.  */
1194   double avail = physmem_available ();
1195   double total = physmem_total ();
1196   double mem = MAX (avail, total / 8);
1197   struct rlimit rlimit;
1198
1199   /* Let SIZE be MEM, but no more than the maximum object size or
1200      system resource limits.  Avoid the MIN macro here, as it is not
1201      quite right when only one argument is floating point.  Don't
1202      bother to check for values like RLIM_INFINITY since in practice
1203      they are not much less than SIZE_MAX.  */
1204   size_t size = SIZE_MAX;
1205   if (mem < size)
1206     size = mem;
1207   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1208     size = rlimit.rlim_cur;
1209 #ifdef RLIMIT_AS
1210   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1211     size = rlimit.rlim_cur;
1212 #endif
1213
1214   /* Leave a large safety margin for the above limits, as failure can
1215      occur when they are exceeded.  */
1216   size /= 2;
1217
1218 #ifdef RLIMIT_RSS
1219   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1220      Exceeding RSS is not fatal, but can be quite slow.  */
1221   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1222     size = rlimit.rlim_cur / 16 * 15;
1223 #endif
1224
1225   /* Use no less than the minimum.  */
1226   return MAX (size, MIN_SORT_SIZE);
1227 }
1228
1229 /* Return the sort buffer size to use with the input files identified
1230    by FPS and FILES, which are alternate names of the same files.
1231    NFILES gives the number of input files; NFPS may be less.  Assume
1232    that each input line requires LINE_BYTES extra bytes' worth of line
1233    information.  Do not exceed the size bound specified by the user
1234    (or a default size bound, if the user does not specify one).  */
1235
1236 static size_t
1237 sort_buffer_size (FILE *const *fps, size_t nfps,
1238                   char *const *files, size_t nfiles,
1239                   size_t line_bytes)
1240 {
1241   /* A bound on the input size.  If zero, the bound hasn't been
1242      determined yet.  */
1243   static size_t size_bound;
1244
1245   /* In the worst case, each input byte is a newline.  */
1246   size_t worst_case_per_input_byte = line_bytes + 1;
1247
1248   /* Keep enough room for one extra input line and an extra byte.
1249      This extra room might be needed when preparing to read EOF.  */
1250   size_t size = worst_case_per_input_byte + 1;
1251
1252   size_t i;
1253
1254   for (i = 0; i < nfiles; i++)
1255     {
1256       struct stat st;
1257       off_t file_size;
1258       size_t worst_case;
1259
1260       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1261            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1262            : stat (files[i], &st))
1263           != 0)
1264         die (_("stat failed"), files[i]);
1265
1266       if (S_ISREG (st.st_mode))
1267         file_size = st.st_size;
1268       else
1269         {
1270           /* The file has unknown size.  If the user specified a sort
1271              buffer size, use that; otherwise, guess the size.  */
1272           if (sort_size)
1273             return sort_size;
1274           file_size = INPUT_FILE_SIZE_GUESS;
1275         }
1276
1277       if (! size_bound)
1278         {
1279           size_bound = sort_size;
1280           if (! size_bound)
1281             size_bound = default_sort_size ();
1282         }
1283
1284       /* Add the amount of memory needed to represent the worst case
1285          where the input consists entirely of newlines followed by a
1286          single non-newline.  Check for overflow.  */
1287       worst_case = file_size * worst_case_per_input_byte + 1;
1288       if (file_size != worst_case / worst_case_per_input_byte
1289           || size_bound - size <= worst_case)
1290         return size_bound;
1291       size += worst_case;
1292     }
1293
1294   return size;
1295 }
1296
1297 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1298    must be at least sizeof (struct line).  Allocate ALLOC bytes
1299    initially.  */
1300
1301 static void
1302 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1303 {
1304   /* Ensure that the line array is properly aligned.  If the desired
1305      size cannot be allocated, repeatedly halve it until allocation
1306      succeeds.  The smaller allocation may hurt overall performance,
1307      but that's better than failing.  */
1308   for (;;)
1309     {
1310       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1311       buf->buf = malloc (alloc);
1312       if (buf->buf)
1313         break;
1314       alloc /= 2;
1315       if (alloc <= line_bytes + 1)
1316         xalloc_die ();
1317     }
1318
1319   buf->line_bytes = line_bytes;
1320   buf->alloc = alloc;
1321   buf->used = buf->left = buf->nlines = 0;
1322   buf->eof = false;
1323 }
1324
1325 /* Return one past the limit of the line array.  */
1326
1327 static inline struct line *
1328 buffer_linelim (struct buffer const *buf)
1329 {
1330   return (struct line *) (buf->buf + buf->alloc);
1331 }
1332
1333 /* Return a pointer to the first character of the field specified
1334    by KEY in LINE. */
1335
1336 static char *
1337 begfield (const struct line *line, const struct keyfield *key)
1338 {
1339   char *ptr = line->text, *lim = ptr + line->length - 1;
1340   size_t sword = key->sword;
1341   size_t schar = key->schar;
1342   size_t remaining_bytes;
1343
1344   /* The leading field separator itself is included in a field when -t
1345      is absent.  */
1346
1347   if (tab != TAB_DEFAULT)
1348     while (ptr < lim && sword--)
1349       {
1350         while (ptr < lim && *ptr != tab)
1351           ++ptr;
1352         if (ptr < lim)
1353           ++ptr;
1354       }
1355   else
1356     while (ptr < lim && sword--)
1357       {
1358         while (ptr < lim && blanks[to_uchar (*ptr)])
1359           ++ptr;
1360         while (ptr < lim && !blanks[to_uchar (*ptr)])
1361           ++ptr;
1362       }
1363
1364   if (key->skipsblanks)
1365     while (ptr < lim && blanks[to_uchar (*ptr)])
1366       ++ptr;
1367
1368   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1369   remaining_bytes = lim - ptr;
1370   if (schar < remaining_bytes)
1371     ptr += schar;
1372   else
1373     ptr = lim;
1374
1375   return ptr;
1376 }
1377
1378 /* Return the limit of (a pointer to the first character after) the field
1379    in LINE specified by KEY. */
1380
1381 static char *
1382 limfield (const struct line *line, const struct keyfield *key)
1383 {
1384   char *ptr = line->text, *lim = ptr + line->length - 1;
1385   size_t eword = key->eword, echar = key->echar;
1386   size_t remaining_bytes;
1387
1388   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1389      whichever comes first.  If there are more than EWORD fields, leave
1390      PTR pointing at the beginning of the field having zero-based index,
1391      EWORD.  If a delimiter character was specified (via -t), then that
1392      `beginning' is the first character following the delimiting TAB.
1393      Otherwise, leave PTR pointing at the first `blank' character after
1394      the preceding field.  */
1395   if (tab != TAB_DEFAULT)
1396     while (ptr < lim && eword--)
1397       {
1398         while (ptr < lim && *ptr != tab)
1399           ++ptr;
1400         if (ptr < lim && (eword | echar))
1401           ++ptr;
1402       }
1403   else
1404     while (ptr < lim && eword--)
1405       {
1406         while (ptr < lim && blanks[to_uchar (*ptr)])
1407           ++ptr;
1408         while (ptr < lim && !blanks[to_uchar (*ptr)])
1409           ++ptr;
1410       }
1411
1412 #ifdef POSIX_UNSPECIFIED
1413   /* The following block of code makes GNU sort incompatible with
1414      standard Unix sort, so it's ifdef'd out for now.
1415      The POSIX spec isn't clear on how to interpret this.
1416      FIXME: request clarification.
1417
1418      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1419      Date: Thu, 30 May 96 12:20:41 -0400
1420      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1421
1422      [...]I believe I've found another bug in `sort'.
1423
1424      $ cat /tmp/sort.in
1425      a b c 2 d
1426      pq rs 1 t
1427      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1428      a b c 2 d
1429      pq rs 1 t
1430      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1431      pq rs 1 t
1432      a b c 2 d
1433
1434      Unix sort produced the answer I expected: sort on the single character
1435      in column 7.  GNU sort produced different results, because it disagrees
1436      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1437      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1438      "skip M-1 fields, then either N-1 characters or the rest of the current
1439      field, whichever comes first".  This extra clause applies only to
1440      key-ends, not key-starts.
1441      */
1442
1443   /* Make LIM point to the end of (one byte past) the current field.  */
1444   if (tab != TAB_DEFAULT)
1445     {
1446       char *newlim;
1447       newlim = memchr (ptr, tab, lim - ptr);
1448       if (newlim)
1449         lim = newlim;
1450     }
1451   else
1452     {
1453       char *newlim;
1454       newlim = ptr;
1455       while (newlim < lim && blanks[to_uchar (*newlim)])
1456         ++newlim;
1457       while (newlim < lim && !blanks[to_uchar (*newlim)])
1458         ++newlim;
1459       lim = newlim;
1460     }
1461 #endif
1462
1463   /* If we're ignoring leading blanks when computing the End
1464      of the field, don't start counting bytes until after skipping
1465      past any leading blanks. */
1466   if (key->skipeblanks)
1467     while (ptr < lim && blanks[to_uchar (*ptr)])
1468       ++ptr;
1469
1470   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1471   remaining_bytes = lim - ptr;
1472   if (echar < remaining_bytes)
1473     ptr += echar;
1474   else
1475     ptr = lim;
1476
1477   return ptr;
1478 }
1479
1480 /* Fill BUF reading from FP, moving buf->left bytes from the end
1481    of buf->buf to the beginning first.  If EOF is reached and the
1482    file wasn't terminated by a newline, supply one.  Set up BUF's line
1483    table too.  FILE is the name of the file corresponding to FP.
1484    Return true if some input was read.  */
1485
1486 static bool
1487 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1488 {
1489   struct keyfield const *key = keylist;
1490   char eol = eolchar;
1491   size_t line_bytes = buf->line_bytes;
1492   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1493
1494   if (buf->eof)
1495     return false;
1496
1497   if (buf->used != buf->left)
1498     {
1499       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1500       buf->used = buf->left;
1501       buf->nlines = 0;
1502     }
1503
1504   for (;;)
1505     {
1506       char *ptr = buf->buf + buf->used;
1507       struct line *linelim = buffer_linelim (buf);
1508       struct line *line = linelim - buf->nlines;
1509       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1510       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1511
1512       while (line_bytes + 1 < avail)
1513         {
1514           /* Read as many bytes as possible, but do not read so many
1515              bytes that there might not be enough room for the
1516              corresponding line array.  The worst case is when the
1517              rest of the input file consists entirely of newlines,
1518              except that the last byte is not a newline.  */
1519           size_t readsize = (avail - 1) / (line_bytes + 1);
1520           size_t bytes_read = fread (ptr, 1, readsize, fp);
1521           char *ptrlim = ptr + bytes_read;
1522           char *p;
1523           avail -= bytes_read;
1524
1525           if (bytes_read != readsize)
1526             {
1527               if (ferror (fp))
1528                 die (_("read failed"), file);
1529               if (feof (fp))
1530                 {
1531                   buf->eof = true;
1532                   if (buf->buf == ptrlim)
1533                     return false;
1534                   if (ptrlim[-1] != eol)
1535                     *ptrlim++ = eol;
1536                 }
1537             }
1538
1539           /* Find and record each line in the just-read input.  */
1540           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1541             {
1542               ptr = p + 1;
1543               line--;
1544               line->text = line_start;
1545               line->length = ptr - line_start;
1546               mergesize = MAX (mergesize, line->length);
1547               avail -= line_bytes;
1548
1549               if (key)
1550                 {
1551                   /* Precompute the position of the first key for
1552                      efficiency.  */
1553                   line->keylim = (key->eword == SIZE_MAX
1554                                   ? p
1555                                   : limfield (line, key));
1556
1557                   if (key->sword != SIZE_MAX)
1558                     line->keybeg = begfield (line, key);
1559                   else
1560                     {
1561                       if (key->skipsblanks)
1562                         while (blanks[to_uchar (*line_start)])
1563                           line_start++;
1564                       line->keybeg = line_start;
1565                     }
1566                 }
1567
1568               line_start = ptr;
1569             }
1570
1571           ptr = ptrlim;
1572           if (buf->eof)
1573             break;
1574         }
1575
1576       buf->used = ptr - buf->buf;
1577       buf->nlines = buffer_linelim (buf) - line;
1578       if (buf->nlines != 0)
1579         {
1580           buf->left = ptr - line_start;
1581           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1582           return true;
1583         }
1584
1585       {
1586         /* The current input line is too long to fit in the buffer.
1587            Double the buffer size and try again, keeping it properly
1588            aligned.  */
1589         size_t line_alloc = buf->alloc / sizeof (struct line);
1590         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1591         buf->alloc = line_alloc * sizeof (struct line);
1592       }
1593     }
1594 }
1595
1596 /* Compare strings A and B as numbers without explicitly converting them to
1597    machine numbers.  Comparatively slow for short strings, but asymptotically
1598    hideously fast. */
1599
1600 static int
1601 numcompare (const char *a, const char *b)
1602 {
1603   while (blanks[to_uchar (*a)])
1604     a++;
1605   while (blanks[to_uchar (*b)])
1606     b++;
1607
1608   return strnumcmp (a, b, decimal_point, thousands_sep);
1609 }
1610
1611 static int
1612 general_numcompare (const char *sa, const char *sb)
1613 {
1614   /* FIXME: add option to warn about failed conversions.  */
1615   /* FIXME: maybe add option to try expensive FP conversion
1616      only if A and B can't be compared more cheaply/accurately.  */
1617
1618   char *ea;
1619   char *eb;
1620   double a = strtod (sa, &ea);
1621   double b = strtod (sb, &eb);
1622
1623   /* Put conversion errors at the start of the collating sequence.  */
1624   if (sa == ea)
1625     return sb == eb ? 0 : -1;
1626   if (sb == eb)
1627     return 1;
1628
1629   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1630      conversion errors but before numbers; sort them by internal
1631      bit-pattern, for lack of a more portable alternative.  */
1632   return (a < b ? -1
1633           : a > b ? 1
1634           : a == b ? 0
1635           : b == b ? -1
1636           : a == a ? 1
1637           : memcmp ((char *) &a, (char *) &b, sizeof a));
1638 }
1639
1640 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1641    Return 0 if the name in S is not recognized.  */
1642
1643 static int
1644 getmonth (char const *month, size_t len)
1645 {
1646   size_t lo = 0;
1647   size_t hi = MONTHS_PER_YEAR;
1648   char const *monthlim = month + len;
1649
1650   for (;;)
1651     {
1652       if (month == monthlim)
1653         return 0;
1654       if (!blanks[to_uchar (*month)])
1655         break;
1656       ++month;
1657     }
1658
1659   do
1660     {
1661       size_t ix = (lo + hi) / 2;
1662       char const *m = month;
1663       char const *n = monthtab[ix].name;
1664
1665       for (;; m++, n++)
1666         {
1667           if (!*n)
1668             return monthtab[ix].val;
1669           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1670             {
1671               hi = ix;
1672               break;
1673             }
1674           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1675             {
1676               lo = ix + 1;
1677               break;
1678             }
1679         }
1680     }
1681   while (lo < hi);
1682
1683   return 0;
1684 }
1685
1686 /* A source of random data.  */
1687 static struct randread_source *randread_source;
1688
1689 /* Return the Ith randomly-generated state.  The caller must invoke
1690    random_state (H) for all H less than I before invoking random_state
1691    (I).  */
1692
1693 static struct md5_ctx
1694 random_state (size_t i)
1695 {
1696   /* An array of states resulting from the random data, and counts of
1697      its used and allocated members.  */
1698   static struct md5_ctx *state;
1699   static size_t used;
1700   static size_t allocated;
1701
1702   struct md5_ctx *s = &state[i];
1703
1704   if (used <= i)
1705     {
1706       unsigned char buf[MD5_DIGEST_SIZE];
1707
1708       used++;
1709
1710       if (allocated <= i)
1711         {
1712           state = X2NREALLOC (state, &allocated);
1713           s = &state[i];
1714         }
1715
1716       randread (randread_source, buf, sizeof buf);
1717       md5_init_ctx (s);
1718       md5_process_bytes (buf, sizeof buf, s);
1719     }
1720
1721   return *s;
1722 }
1723
1724 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1725    with length LENGTHB.  Return negative if less, zero if equal,
1726    positive if greater.  */
1727
1728 static int
1729 cmp_hashes (char const *texta, size_t lena,
1730             char const *textb, size_t lenb)
1731 {
1732   /* Try random hashes until a pair of hashes disagree.  But if the
1733      first pair of random hashes agree, check whether the keys are
1734      identical and if so report no difference.  */
1735   int diff;
1736   size_t i;
1737   for (i = 0; ; i++)
1738     {
1739       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1740       struct md5_ctx s[2];
1741       s[0] = s[1] = random_state (i);
1742       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1743       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1744       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1745       if (diff != 0)
1746         break;
1747       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1748         break;
1749     }
1750
1751   return diff;
1752 }
1753
1754 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1755    using one or more random hash functions.  */
1756
1757 static int
1758 compare_random (char *restrict texta, size_t lena,
1759                 char *restrict textb, size_t lenb)
1760 {
1761   int diff;
1762
1763   if (! hard_LC_COLLATE)
1764     diff = cmp_hashes (texta, lena, textb, lenb);
1765   else
1766     {
1767       /* Transform the text into the basis of comparison, so that byte
1768          strings that would otherwise considered to be equal are
1769          considered equal here even if their bytes differ.  */
1770
1771       char *buf = NULL;
1772       char stackbuf[4000];
1773       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1774       bool a_fits = tlena <= sizeof stackbuf;
1775       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1776                                (a_fits ? sizeof stackbuf - tlena : 0),
1777                                textb, lenb);
1778
1779       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1780         buf = stackbuf;
1781       else
1782         {
1783           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1784              faster by avoiding the need for an extra buffer copy.  */
1785           buf = xmalloc (tlena + tlenb + 1);
1786           xmemxfrm (buf, tlena + 1, texta, lena);
1787           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1788         }
1789
1790       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1791
1792       if (buf != stackbuf)
1793         free (buf);
1794     }
1795
1796   return diff;
1797 }
1798
1799 /* Compare two lines A and B trying every key in sequence until there
1800    are no more keys or a difference is found. */
1801
1802 static int
1803 keycompare (const struct line *a, const struct line *b)
1804 {
1805   struct keyfield const *key = keylist;
1806
1807   /* For the first iteration only, the key positions have been
1808      precomputed for us. */
1809   char *texta = a->keybeg;
1810   char *textb = b->keybeg;
1811   char *lima = a->keylim;
1812   char *limb = b->keylim;
1813
1814   int diff;
1815
1816   for (;;)
1817     {
1818       char const *translate = key->translate;
1819       bool const *ignore = key->ignore;
1820
1821       /* Find the lengths. */
1822       size_t lena = lima <= texta ? 0 : lima - texta;
1823       size_t lenb = limb <= textb ? 0 : limb - textb;
1824
1825       /* Actually compare the fields. */
1826
1827       if (key->random)
1828         diff = compare_random (texta, lena, textb, lenb);
1829       else if (key->numeric | key->general_numeric)
1830         {
1831           char savea = *lima, saveb = *limb;
1832
1833           *lima = *limb = '\0';
1834           diff = ((key->numeric ? numcompare : general_numcompare)
1835                   (texta, textb));
1836           *lima = savea, *limb = saveb;
1837         }
1838       else if (key->month)
1839         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1840       /* Sorting like this may become slow, so in a simple locale the user
1841          can select a faster sort that is similar to ascii sort.  */
1842       else if (hard_LC_COLLATE)
1843         {
1844           if (ignore || translate)
1845             {
1846               char buf[4000];
1847               size_t size = lena + 1 + lenb + 1;
1848               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1849               char *copy_b = copy_a + lena + 1;
1850               size_t new_len_a, new_len_b, i;
1851
1852               /* Ignore and/or translate chars before comparing.  */
1853               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1854                 {
1855                   if (i < lena)
1856                     {
1857                       copy_a[new_len_a] = (translate
1858                                            ? translate[to_uchar (texta[i])]
1859                                            : texta[i]);
1860                       if (!ignore || !ignore[to_uchar (texta[i])])
1861                         ++new_len_a;
1862                     }
1863                   if (i < lenb)
1864                     {
1865                       copy_b[new_len_b] = (translate
1866                                            ? translate[to_uchar (textb[i])]
1867                                            : textb [i]);
1868                       if (!ignore || !ignore[to_uchar (textb[i])])
1869                         ++new_len_b;
1870                     }
1871                 }
1872
1873               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1874
1875               if (sizeof buf < size)
1876                 free (copy_a);
1877             }
1878           else if (lena == 0)
1879             diff = - NONZERO (lenb);
1880           else if (lenb == 0)
1881             goto greater;
1882           else
1883             diff = xmemcoll (texta, lena, textb, lenb);
1884         }
1885       else if (ignore)
1886         {
1887 #define CMP_WITH_IGNORE(A, B)                                           \
1888   do                                                                    \
1889     {                                                                   \
1890           for (;;)                                                      \
1891             {                                                           \
1892               while (texta < lima && ignore[to_uchar (*texta)])         \
1893                 ++texta;                                                \
1894               while (textb < limb && ignore[to_uchar (*textb)])         \
1895                 ++textb;                                                \
1896               if (! (texta < lima && textb < limb))                     \
1897                 break;                                                  \
1898               diff = to_uchar (A) - to_uchar (B);                       \
1899               if (diff)                                                 \
1900                 goto not_equal;                                         \
1901               ++texta;                                                  \
1902               ++textb;                                                  \
1903             }                                                           \
1904                                                                         \
1905           diff = (texta < lima) - (textb < limb);                       \
1906     }                                                                   \
1907   while (0)
1908
1909           if (translate)
1910             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1911                              translate[to_uchar (*textb)]);
1912           else
1913             CMP_WITH_IGNORE (*texta, *textb);
1914         }
1915       else if (lena == 0)
1916         diff = - NONZERO (lenb);
1917       else if (lenb == 0)
1918         goto greater;
1919       else
1920         {
1921           if (translate)
1922             {
1923               while (texta < lima && textb < limb)
1924                 {
1925                   diff = (to_uchar (translate[to_uchar (*texta++)])
1926                           - to_uchar (translate[to_uchar (*textb++)]));
1927                   if (diff)
1928                     goto not_equal;
1929                 }
1930             }
1931           else
1932             {
1933               diff = memcmp (texta, textb, MIN (lena, lenb));
1934               if (diff)
1935                 goto not_equal;
1936             }
1937           diff = lena < lenb ? -1 : lena != lenb;
1938         }
1939
1940       if (diff)
1941         goto not_equal;
1942
1943       key = key->next;
1944       if (! key)
1945         break;
1946
1947       /* Find the beginning and limit of the next field.  */
1948       if (key->eword != SIZE_MAX)
1949         lima = limfield (a, key), limb = limfield (b, key);
1950       else
1951         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1952
1953       if (key->sword != SIZE_MAX)
1954         texta = begfield (a, key), textb = begfield (b, key);
1955       else
1956         {
1957           texta = a->text, textb = b->text;
1958           if (key->skipsblanks)
1959             {
1960               while (texta < lima && blanks[to_uchar (*texta)])
1961                 ++texta;
1962               while (textb < limb && blanks[to_uchar (*textb)])
1963                 ++textb;
1964             }
1965         }
1966     }
1967
1968   return 0;
1969
1970  greater:
1971   diff = 1;
1972  not_equal:
1973   return key->reverse ? -diff : diff;
1974 }
1975
1976 /* Compare two lines A and B, returning negative, zero, or positive
1977    depending on whether A compares less than, equal to, or greater than B. */
1978
1979 static int
1980 compare (const struct line *a, const struct line *b)
1981 {
1982   int diff;
1983   size_t alen, blen;
1984
1985   /* First try to compare on the specified keys (if any).
1986      The only two cases with no key at all are unadorned sort,
1987      and unadorned sort -r. */
1988   if (keylist)
1989     {
1990       diff = keycompare (a, b);
1991       if (diff | unique | stable)
1992         return diff;
1993     }
1994
1995   /* If the keys all compare equal (or no keys were specified)
1996      fall through to the default comparison.  */
1997   alen = a->length - 1, blen = b->length - 1;
1998
1999   if (alen == 0)
2000     diff = - NONZERO (blen);
2001   else if (blen == 0)
2002     diff = 1;
2003   else if (hard_LC_COLLATE)
2004     diff = xmemcoll (a->text, alen, b->text, blen);
2005   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2006     diff = alen < blen ? -1 : alen != blen;
2007
2008   return reverse ? -diff : diff;
2009 }
2010
2011 /* Check that the lines read from FILE_NAME come in order.  Return
2012    true if they are in order.  If CHECKONLY == 'c', also print a
2013    diagnostic (FILE_NAME, line number, contents of line) to stderr if
2014    they are not in order.  */
2015
2016 static bool
2017 check (char const *file_name, char checkonly)
2018 {
2019   FILE *fp = xfopen (file_name, "r");
2020   struct buffer buf;            /* Input buffer. */
2021   struct line temp;             /* Copy of previous line. */
2022   size_t alloc = 0;
2023   uintmax_t line_number = 0;
2024   struct keyfield const *key = keylist;
2025   bool nonunique = ! unique;
2026   bool ordered = true;
2027
2028   initbuf (&buf, sizeof (struct line),
2029            MAX (merge_buffer_size, sort_size));
2030   temp.text = NULL;
2031
2032   while (fillbuf (&buf, fp, file_name))
2033     {
2034       struct line const *line = buffer_linelim (&buf);
2035       struct line const *linebase = line - buf.nlines;
2036
2037       /* Make sure the line saved from the old buffer contents is
2038          less than or equal to the first line of the new buffer. */
2039       if (alloc && nonunique <= compare (&temp, line - 1))
2040         {
2041         found_disorder:
2042           {
2043             if (checkonly == 'c')
2044               {
2045                 struct line const *disorder_line = line - 1;
2046                 uintmax_t disorder_line_number =
2047                   buffer_linelim (&buf) - disorder_line + line_number;
2048                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2049                 fprintf (stderr, _("%s: %s:%s: disorder: "),
2050                          program_name, file_name,
2051                          umaxtostr (disorder_line_number, hr_buf));
2052                 write_bytes (disorder_line->text, disorder_line->length,
2053                              stderr, _("standard error"));
2054               }
2055
2056             ordered = false;
2057             break;
2058           }
2059         }
2060
2061       /* Compare each line in the buffer with its successor.  */
2062       while (linebase < --line)
2063         if (nonunique <= compare (line, line - 1))
2064           goto found_disorder;
2065
2066       line_number += buf.nlines;
2067
2068       /* Save the last line of the buffer.  */
2069       if (alloc < line->length)
2070         {
2071           do
2072             {
2073               alloc *= 2;
2074               if (! alloc)
2075                 {
2076                   alloc = line->length;
2077                   break;
2078                 }
2079             }
2080           while (alloc < line->length);
2081
2082           temp.text = xrealloc (temp.text, alloc);
2083         }
2084       memcpy (temp.text, line->text, line->length);
2085       temp.length = line->length;
2086       if (key)
2087         {
2088           temp.keybeg = temp.text + (line->keybeg - line->text);
2089           temp.keylim = temp.text + (line->keylim - line->text);
2090         }
2091     }
2092
2093   xfclose (fp, file_name);
2094   free (buf.buf);
2095   free (temp.text);
2096   return ordered;
2097 }
2098
2099 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2100    files (all of which are at the start of the FILES array), and
2101    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2102    Close input and output files before returning.
2103    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2104    the output file is standard output.  If OFP is NULL, the output
2105    file has not been opened yet (or written to, if standard output).  */
2106
2107 static void
2108 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2109           FILE *ofp, char const *output_file)
2110 {
2111   FILE **fps = xnmalloc (nmerge, sizeof *fps);
2112                                 /* Input streams for each file.  */
2113   struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2114                                 /* Input buffers for each file. */
2115   struct line saved;            /* Saved line storage for unique check. */
2116   struct line const *savedline = NULL;
2117                                 /* &saved if there is a saved line. */
2118   size_t savealloc = 0;         /* Size allocated for the saved line. */
2119   struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2120                                 /* Current line in each line table. */
2121   struct line const **base = xnmalloc (nmerge, sizeof *base);
2122                                 /* Base of each line table.  */
2123   size_t *ord = xnmalloc (nmerge, sizeof *ord);
2124                                 /* Table representing a permutation of fps,
2125                                    such that cur[ord[0]] is the smallest line
2126                                    and will be next output. */
2127   size_t i;
2128   size_t j;
2129   size_t t;
2130   struct keyfield const *key = keylist;
2131   saved.text = NULL;
2132
2133   /* Read initial lines from each input file. */
2134   for (i = 0; i < nfiles; )
2135     {
2136       fps[i] = (files[i].pid
2137                 ? open_temp (files[i].name, files[i].pid)
2138                 : xfopen (files[i].name, "r"));
2139       initbuf (&buffer[i], sizeof (struct line),
2140                MAX (merge_buffer_size, sort_size / nfiles));
2141       if (fillbuf (&buffer[i], fps[i], files[i].name))
2142         {
2143           struct line const *linelim = buffer_linelim (&buffer[i]);
2144           cur[i] = linelim - 1;
2145           base[i] = linelim - buffer[i].nlines;
2146           i++;
2147         }
2148       else
2149         {
2150           /* fps[i] is empty; eliminate it from future consideration.  */
2151           xfclose (fps[i], files[i].name);
2152           if (i < ntemps)
2153             {
2154               ntemps--;
2155               zaptemp (files[i].name);
2156             }
2157           free (buffer[i].buf);
2158           --nfiles;
2159           for (j = i; j < nfiles; ++j)
2160             files[j] = files[j + 1];
2161         }
2162     }
2163
2164   if (! ofp)
2165     ofp = xfopen (output_file, "w");
2166
2167   /* Set up the ord table according to comparisons among input lines.
2168      Since this only reorders two items if one is strictly greater than
2169      the other, it is stable. */
2170   for (i = 0; i < nfiles; ++i)
2171     ord[i] = i;
2172   for (i = 1; i < nfiles; ++i)
2173     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2174       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2175
2176   /* Repeatedly output the smallest line until no input remains. */
2177   while (nfiles)
2178     {
2179       struct line const *smallest = cur[ord[0]];
2180
2181       /* If uniquified output is turned on, output only the first of
2182          an identical series of lines. */
2183       if (unique)
2184         {
2185           if (savedline && compare (savedline, smallest))
2186             {
2187               savedline = NULL;
2188               write_bytes (saved.text, saved.length, ofp, output_file);
2189             }
2190           if (!savedline)
2191             {
2192               savedline = &saved;
2193               if (savealloc < smallest->length)
2194                 {
2195                   do
2196                     if (! savealloc)
2197                       {
2198                         savealloc = smallest->length;
2199                         break;
2200                       }
2201                   while ((savealloc *= 2) < smallest->length);
2202
2203                   saved.text = xrealloc (saved.text, savealloc);
2204                 }
2205               saved.length = smallest->length;
2206               memcpy (saved.text, smallest->text, saved.length);
2207               if (key)
2208                 {
2209                   saved.keybeg =
2210                     saved.text + (smallest->keybeg - smallest->text);
2211                   saved.keylim =
2212                     saved.text + (smallest->keylim - smallest->text);
2213                 }
2214             }
2215         }
2216       else
2217         write_bytes (smallest->text, smallest->length, ofp, output_file);
2218
2219       /* Check if we need to read more lines into core. */
2220       if (base[ord[0]] < smallest)
2221         cur[ord[0]] = smallest - 1;
2222       else
2223         {
2224           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2225             {
2226               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2227               cur[ord[0]] = linelim - 1;
2228               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2229             }
2230           else
2231             {
2232               /* We reached EOF on fps[ord[0]].  */
2233               for (i = 1; i < nfiles; ++i)
2234                 if (ord[i] > ord[0])
2235                   --ord[i];
2236               --nfiles;
2237               xfclose (fps[ord[0]], files[ord[0]].name);
2238               if (ord[0] < ntemps)
2239                 {
2240                   ntemps--;
2241                   zaptemp (files[ord[0]].name);
2242                 }
2243               free (buffer[ord[0]].buf);
2244               for (i = ord[0]; i < nfiles; ++i)
2245                 {
2246                   fps[i] = fps[i + 1];
2247                   files[i] = files[i + 1];
2248                   buffer[i] = buffer[i + 1];
2249                   cur[i] = cur[i + 1];
2250                   base[i] = base[i + 1];
2251                 }
2252               for (i = 0; i < nfiles; ++i)
2253                 ord[i] = ord[i + 1];
2254               continue;
2255             }
2256         }
2257
2258       /* The new line just read in may be larger than other lines
2259          already in main memory; push it back in the queue until we
2260          encounter a line larger than it.  Optimize for the common
2261          case where the new line is smallest.  */
2262       {
2263         size_t lo = 1;
2264         size_t hi = nfiles;
2265         size_t probe = lo;
2266         size_t ord0 = ord[0];
2267         size_t count_of_smaller_lines;
2268
2269         while (lo < hi)
2270           {
2271             int cmp = compare (cur[ord0], cur[ord[probe]]);
2272             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2273               hi = probe;
2274             else
2275               lo = probe + 1;
2276             probe = (lo + hi) / 2;
2277           }
2278
2279         count_of_smaller_lines = lo - 1;
2280         for (j = 0; j < count_of_smaller_lines; j++)
2281           ord[j] = ord[j + 1];
2282         ord[count_of_smaller_lines] = ord0;
2283       }
2284
2285       /* Free up some resources every once in a while.  */
2286       if (MAX_PROCS_BEFORE_REAP < nprocs)
2287         reap_some ();
2288     }
2289
2290   if (unique && savedline)
2291     {
2292       write_bytes (saved.text, saved.length, ofp, output_file);
2293       free (saved.text);
2294     }
2295
2296   xfclose (ofp, output_file);
2297   free(fps);
2298   free(buffer);
2299   free(ord);
2300   free(base);
2301   free(cur);
2302 }
2303
2304 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2305    and HI (with NHI members).  T, LO, and HI point just past their
2306    respective arrays, and the arrays are in reverse order.  NLO and
2307    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2308
2309 static inline void
2310 mergelines (struct line *t,
2311             struct line const *lo, size_t nlo,
2312             struct line const *hi, size_t nhi)
2313 {
2314   for (;;)
2315     if (compare (lo - 1, hi - 1) <= 0)
2316       {
2317         *--t = *--lo;
2318         if (! --nlo)
2319           {
2320             /* HI - NHI equalled T - (NLO + NHI) when this function
2321                began.  Therefore HI must equal T now, and there is no
2322                need to copy from HI to T.  */
2323             return;
2324           }
2325       }
2326     else
2327       {
2328         *--t = *--hi;
2329         if (! --nhi)
2330           {
2331             do
2332               *--t = *--lo;
2333             while (--nlo);
2334
2335             return;
2336           }
2337       }
2338 }
2339
2340 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2341    NLINES must be at least 2.
2342    The input and output arrays are in reverse order, and LINES and
2343    TEMP point just past the end of their respective arrays.
2344
2345    Use a recursive divide-and-conquer algorithm, in the style
2346    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2347    the optimization suggested by exercise 5.2.4-10; this requires room
2348    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2349    writes that this memory optimization was originally published by
2350    D. A. Bell, Comp J. 1 (1958), 75.  */
2351
2352 static void
2353 sortlines (struct line *lines, size_t nlines, struct line *temp)
2354 {
2355   if (nlines == 2)
2356     {
2357       if (0 < compare (&lines[-1], &lines[-2]))
2358         {
2359           struct line tmp = lines[-1];
2360           lines[-1] = lines[-2];
2361           lines[-2] = tmp;
2362         }
2363     }
2364   else
2365     {
2366       size_t nlo = nlines / 2;
2367       size_t nhi = nlines - nlo;
2368       struct line *lo = lines;
2369       struct line *hi = lines - nlo;
2370       struct line *sorted_lo = temp;
2371
2372       sortlines (hi, nhi, temp);
2373       if (1 < nlo)
2374         sortlines_temp (lo, nlo, sorted_lo);
2375       else
2376         sorted_lo[-1] = lo[-1];
2377
2378       mergelines (lines, sorted_lo, nlo, hi, nhi);
2379     }
2380 }
2381
2382 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2383    rather than sorting in place.  */
2384
2385 static void
2386 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2387 {
2388   if (nlines == 2)
2389     {
2390       /* Declare `swap' as int, not bool, to work around a bug
2391          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2392          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2393       int swap = (0 < compare (&lines[-1], &lines[-2]));
2394       temp[-1] = lines[-1 - swap];
2395       temp[-2] = lines[-2 + swap];
2396     }
2397   else
2398     {
2399       size_t nlo = nlines / 2;
2400       size_t nhi = nlines - nlo;
2401       struct line *lo = lines;
2402       struct line *hi = lines - nlo;
2403       struct line *sorted_hi = temp - nlo;
2404
2405       sortlines_temp (hi, nhi, sorted_hi);
2406       if (1 < nlo)
2407         sortlines (lo, nlo, temp);
2408
2409       mergelines (temp, lo, nlo, sorted_hi, nhi);
2410     }
2411 }
2412
2413 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2414    the same as OUTFILE.  If found, merge the found instances (and perhaps
2415    some other files) into a temporary file so that it can in turn be
2416    merged into OUTFILE without destroying OUTFILE before it is completely
2417    read.  Return the new value of NFILES, which differs from the old if
2418    some merging occurred.
2419
2420    This test ensures that an otherwise-erroneous use like
2421    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2422    It's not clear that POSIX requires this nicety.
2423    Detect common error cases, but don't try to catch obscure cases like
2424    "cat ... FILE ... | sort -m -o FILE"
2425    where traditional "sort" doesn't copy the input and where
2426    people should know that they're getting into trouble anyway.
2427    Catching these obscure cases would slow down performance in
2428    common cases.  */
2429
2430 static size_t
2431 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2432                       size_t nfiles, char const *outfile)
2433 {
2434   size_t i;
2435   bool got_outstat = false;
2436   struct stat outstat;
2437
2438   for (i = ntemps; i < nfiles; i++)
2439     {
2440       bool is_stdin = STREQ (files[i].name, "-");
2441       bool same;
2442       struct stat instat;
2443
2444       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2445         same = true;
2446       else
2447         {
2448           if (! got_outstat)
2449             {
2450               if ((outfile
2451                    ? stat (outfile, &outstat)
2452                    : fstat (STDOUT_FILENO, &outstat))
2453                   != 0)
2454                 break;
2455               got_outstat = true;
2456             }
2457
2458           same = (((is_stdin
2459                     ? fstat (STDIN_FILENO, &instat)
2460                     : stat (files[i].name, &instat))
2461                    == 0)
2462                   && SAME_INODE (instat, outstat));
2463         }
2464
2465       if (same)
2466         {
2467           FILE *tftp;
2468           pid_t pid;
2469           char *temp = create_temp (&tftp, &pid);
2470           mergefps (&files[i],0, nfiles - i, tftp, temp);
2471           files[i].name = temp;
2472           files[i].pid = pid;
2473           return i + 1;
2474         }
2475     }
2476
2477   return nfiles;
2478 }
2479
2480 /* Merge the input FILES.  NTEMPS is the number of files at the
2481    start of FILES that are temporary; it is zero at the top level.
2482    NFILES is the total number of files.  Put the output in
2483    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2484
2485 static void
2486 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2487        char const *output_file)
2488 {
2489   while (nmerge < nfiles)
2490     {
2491       /* Number of input files processed so far.  */
2492       size_t in;
2493
2494       /* Number of output files generated so far.  */
2495       size_t out;
2496
2497       /* nfiles % NMERGE; this counts input files that are left over
2498          after all full-sized merges have been done.  */
2499       size_t remainder;
2500
2501       /* Number of easily-available slots at the next loop iteration.  */
2502       size_t cheap_slots;
2503
2504       /* Do as many NMERGE-size merges as possible.  */
2505       for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2506         {
2507           FILE *tfp;
2508           pid_t pid;
2509           char *temp = create_temp (&tfp, &pid);
2510           size_t nt = MIN (ntemps, nmerge);
2511           ntemps -= nt;
2512           mergefps (&files[in], nt, nmerge, tfp, temp);
2513           files[out].name = temp;
2514           files[out].pid = pid;
2515         }
2516
2517       remainder = nfiles - in;
2518       cheap_slots = nmerge - out % nmerge;
2519
2520       if (cheap_slots < remainder)
2521         {
2522           /* So many files remain that they can't all be put into the last
2523              NMERGE-sized output window.  Do one more merge.  Merge as few
2524              files as possible, to avoid needless I/O.  */
2525           size_t nshortmerge = remainder - cheap_slots + 1;
2526           FILE *tfp;
2527           pid_t pid;
2528           char *temp = create_temp (&tfp, &pid);
2529           size_t nt = MIN (ntemps, nshortmerge);
2530           ntemps -= nt;
2531           mergefps (&files[in], nt, nshortmerge, tfp, temp);
2532           files[out].name = temp;
2533           files[out++].pid = pid;
2534           in += nshortmerge;
2535         }
2536
2537       /* Put the remaining input files into the last NMERGE-sized output
2538          window, so they will be merged in the next pass.  */
2539       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2540       ntemps += out;
2541       nfiles -= in - out;
2542     }
2543
2544   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2545   mergefps (files, ntemps, nfiles, NULL, output_file);
2546 }
2547
2548 /* Sort NFILES FILES onto OUTPUT_FILE. */
2549
2550 static void
2551 sort (char * const *files, size_t nfiles, char const *output_file)
2552 {
2553   struct buffer buf;
2554   size_t ntemps = 0;
2555   bool output_file_created = false;
2556
2557   buf.alloc = 0;
2558
2559   while (nfiles)
2560     {
2561       char const *temp_output;
2562       char const *file = *files;
2563       FILE *fp = xfopen (file, "r");
2564       FILE *tfp;
2565       size_t bytes_per_line = (2 * sizeof (struct line)
2566                                - sizeof (struct line) / 2);
2567
2568       if (! buf.alloc)
2569         initbuf (&buf, bytes_per_line,
2570                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2571       buf.eof = false;
2572       files++;
2573       nfiles--;
2574
2575       while (fillbuf (&buf, fp, file))
2576         {
2577           struct line *line;
2578           struct line *linebase;
2579
2580           if (buf.eof && nfiles
2581               && (bytes_per_line + 1
2582                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2583             {
2584               /* End of file, but there is more input and buffer room.
2585                  Concatenate the next input file; this is faster in
2586                  the usual case.  */
2587               buf.left = buf.used;
2588               break;
2589             }
2590
2591           line = buffer_linelim (&buf);
2592           linebase = line - buf.nlines;
2593           if (1 < buf.nlines)
2594             sortlines (line, buf.nlines, linebase);
2595           if (buf.eof && !nfiles && !ntemps && !buf.left)
2596             {
2597               xfclose (fp, file);
2598               tfp = xfopen (output_file, "w");
2599               temp_output = output_file;
2600               output_file_created = true;
2601             }
2602           else
2603             {
2604               ++ntemps;
2605               temp_output = create_temp (&tfp, NULL);
2606             }
2607
2608           do
2609             {
2610               line--;
2611               write_bytes (line->text, line->length, tfp, temp_output);
2612               if (unique)
2613                 while (linebase < line && compare (line, line - 1) == 0)
2614                   line--;
2615             }
2616           while (linebase < line);
2617
2618           xfclose (tfp, temp_output);
2619
2620           /* Free up some resources every once in a while.  */
2621           if (MAX_PROCS_BEFORE_REAP < nprocs)
2622             reap_some ();
2623
2624           if (output_file_created)
2625             goto finish;
2626         }
2627       xfclose (fp, file);
2628     }
2629
2630  finish:
2631   free (buf.buf);
2632
2633   if (! output_file_created)
2634     {
2635       size_t i;
2636       struct tempnode *node = temphead;
2637       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2638       for (i = 0; node; i++)
2639         {
2640           tempfiles[i].name = node->name;
2641           tempfiles[i].pid = node->pid;
2642           node = node->next;
2643         }
2644       merge (tempfiles, ntemps, ntemps, output_file);
2645       free (tempfiles);
2646     }
2647 }
2648
2649 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2650
2651 static void
2652 insertkey (struct keyfield *key_arg)
2653 {
2654   struct keyfield **p;
2655   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2656
2657   for (p = &keylist; *p; p = &(*p)->next)
2658     continue;
2659   *p = key;
2660   key->next = NULL;
2661 }
2662
2663 /* Report a bad field specification SPEC, with extra info MSGID.  */
2664
2665 static void badfieldspec (char const *, char const *)
2666      ATTRIBUTE_NORETURN;
2667 static void
2668 badfieldspec (char const *spec, char const *msgid)
2669 {
2670   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2671          _(msgid), quote (spec));
2672   abort ();
2673 }
2674
2675 /* Report incompatible options.  */
2676
2677 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2678 static void
2679 incompatible_options (char const *opts)
2680 {
2681   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2682   abort ();
2683 }
2684
2685 /* Check compatibility of ordering options.  */
2686
2687 static void
2688 check_ordering_compatibility (void)
2689 {
2690   struct keyfield const *key;
2691
2692   for (key = keylist; key; key = key->next)
2693     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2694               + !!key->ignore))
2695         || (key->random && key->translate))
2696       {
2697         char opts[7];
2698         char *p = opts;
2699         if (key->ignore == nondictionary)
2700           *p++ = 'd';
2701         if (key->translate)
2702           *p++ = 'f';
2703         if (key->general_numeric)
2704           *p++ = 'g';
2705         if (key->ignore == nonprinting)
2706           *p++ = 'i';
2707         if (key->month)
2708           *p++ = 'M';
2709         if (key->numeric)
2710           *p++ = 'n';
2711         if (key->random)
2712           *p++ = 'R';
2713         *p = '\0';
2714         incompatible_options (opts);
2715       }
2716 }
2717
2718 /* Parse the leading integer in STRING and store the resulting value
2719    (which must fit into size_t) into *VAL.  Return the address of the
2720    suffix after the integer.  If the value is too large, silently
2721    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
2722    failure; otherwise, report MSGID and exit on failure.  */
2723
2724 static char const *
2725 parse_field_count (char const *string, size_t *val, char const *msgid)
2726 {
2727   char *suffix;
2728   uintmax_t n;
2729
2730   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2731     {
2732     case LONGINT_OK:
2733     case LONGINT_INVALID_SUFFIX_CHAR:
2734       *val = n;
2735       if (*val == n)
2736         break;
2737       /* Fall through.  */
2738     case LONGINT_OVERFLOW:
2739     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2740       *val = SIZE_MAX;
2741       break;
2742
2743     case LONGINT_INVALID:
2744       if (msgid)
2745         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2746                _(msgid), quote (string));
2747       return NULL;
2748     }
2749
2750   return suffix;
2751 }
2752
2753 /* Handle interrupts and hangups. */
2754
2755 static void
2756 sighandler (int sig)
2757 {
2758   if (! SA_NOCLDSTOP)
2759     signal (sig, SIG_IGN);
2760
2761   cleanup ();
2762
2763   signal (sig, SIG_DFL);
2764   raise (sig);
2765 }
2766
2767 /* Set the ordering options for KEY specified in S.
2768    Return the address of the first character in S that
2769    is not a valid ordering option.
2770    BLANKTYPE is the kind of blanks that 'b' should skip. */
2771
2772 static char *
2773 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2774 {
2775   while (*s)
2776     {
2777       switch (*s)
2778         {
2779         case 'b':
2780           if (blanktype == bl_start || blanktype == bl_both)
2781             key->skipsblanks = true;
2782           if (blanktype == bl_end || blanktype == bl_both)
2783             key->skipeblanks = true;
2784           break;
2785         case 'd':
2786           key->ignore = nondictionary;
2787           break;
2788         case 'f':
2789           key->translate = fold_toupper;
2790           break;
2791         case 'g':
2792           key->general_numeric = true;
2793           break;
2794         case 'i':
2795           /* Option order should not matter, so don't let -i override
2796              -d.  -d implies -i, but -i does not imply -d.  */
2797           if (! key->ignore)
2798             key->ignore = nonprinting;
2799           break;
2800         case 'M':
2801           key->month = true;
2802           break;
2803         case 'n':
2804           key->numeric = true;
2805           break;
2806         case 'R':
2807           key->random = true;
2808           break;
2809         case 'r':
2810           key->reverse = true;
2811           break;
2812         default:
2813           return (char *) s;
2814         }
2815       ++s;
2816     }
2817   return (char *) s;
2818 }
2819
2820 static struct keyfield *
2821 key_init (struct keyfield *key)
2822 {
2823   memset (key, 0, sizeof *key);
2824   key->eword = SIZE_MAX;
2825   return key;
2826 }
2827
2828 int
2829 main (int argc, char **argv)
2830 {
2831   struct keyfield *key;
2832   struct keyfield key_buf;
2833   struct keyfield gkey;
2834   char const *s;
2835   int c = 0;
2836   char checkonly = 0;
2837   bool mergeonly = false;
2838   char *random_source = NULL;
2839   bool need_random = false;
2840   size_t nfiles = 0;
2841   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2842   bool obsolete_usage = (posix2_version () < 200112);
2843   char **files;
2844   char *files_from = NULL;
2845   struct Tokens tok;
2846   char const *outfile = NULL;
2847
2848   initialize_main (&argc, &argv);
2849   set_program_name (argv[0]);
2850   setlocale (LC_ALL, "");
2851   bindtextdomain (PACKAGE, LOCALEDIR);
2852   textdomain (PACKAGE);
2853
2854   initialize_exit_failure (SORT_FAILURE);
2855
2856   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2857 #if HAVE_NL_LANGINFO
2858   hard_LC_TIME = hard_locale (LC_TIME);
2859 #endif
2860
2861   /* Get locale's representation of the decimal point.  */
2862   {
2863     struct lconv const *locale = localeconv ();
2864
2865     /* If the locale doesn't define a decimal point, or if the decimal
2866        point is multibyte, use the C locale's decimal point.  FIXME:
2867        add support for multibyte decimal points.  */
2868     decimal_point = to_uchar (locale->decimal_point[0]);
2869     if (! decimal_point || locale->decimal_point[1])
2870       decimal_point = '.';
2871
2872     /* FIXME: add support for multibyte thousands separators.  */
2873     thousands_sep = to_uchar (*locale->thousands_sep);
2874     if (! thousands_sep || locale->thousands_sep[1])
2875       thousands_sep = -1;
2876   }
2877
2878   have_read_stdin = false;
2879   inittables ();
2880
2881   {
2882     size_t i;
2883     static int const sig[] =
2884       {
2885         /* The usual suspects.  */
2886         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2887 #ifdef SIGPOLL
2888         SIGPOLL,
2889 #endif
2890 #ifdef SIGPROF
2891         SIGPROF,
2892 #endif
2893 #ifdef SIGVTALRM
2894         SIGVTALRM,
2895 #endif
2896 #ifdef SIGXCPU
2897         SIGXCPU,
2898 #endif
2899 #ifdef SIGXFSZ
2900         SIGXFSZ,
2901 #endif
2902       };
2903     enum { nsigs = sizeof sig / sizeof sig[0] };
2904
2905 #if SA_NOCLDSTOP
2906     struct sigaction act;
2907
2908     sigemptyset (&caught_signals);
2909     for (i = 0; i < nsigs; i++)
2910       {
2911         sigaction (sig[i], NULL, &act);
2912         if (act.sa_handler != SIG_IGN)
2913           sigaddset (&caught_signals, sig[i]);
2914       }
2915
2916     act.sa_handler = sighandler;
2917     act.sa_mask = caught_signals;
2918     act.sa_flags = 0;
2919
2920     for (i = 0; i < nsigs; i++)
2921       if (sigismember (&caught_signals, sig[i]))
2922         sigaction (sig[i], &act, NULL);
2923 #else
2924     for (i = 0; i < nsigs; i++)
2925       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2926         {
2927           signal (sig[i], sighandler);
2928           siginterrupt (sig[i], 1);
2929         }
2930 #endif
2931   }
2932
2933   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
2934   atexit (exit_cleanup);
2935
2936   gkey.sword = gkey.eword = SIZE_MAX;
2937   gkey.ignore = NULL;
2938   gkey.translate = NULL;
2939   gkey.numeric = gkey.general_numeric = gkey.random = false;
2940   gkey.month = gkey.reverse = false;
2941   gkey.skipsblanks = gkey.skipeblanks = false;
2942
2943   files = xnmalloc (argc, sizeof *files);
2944
2945   for (;;)
2946     {
2947       /* Parse an operand as a file after "--" was seen; or if
2948          pedantic and a file was seen, unless the POSIX version
2949          predates 1003.1-2001 and -c was not seen and the operand is
2950          "-o FILE" or "-oFILE".  */
2951       int oi = -1;
2952
2953       if (c == -1
2954           || (posixly_correct && nfiles != 0
2955               && ! (obsolete_usage
2956                     && ! checkonly
2957                     && optind != argc
2958                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2959                     && (argv[optind][2] || optind + 1 != argc)))
2960           || ((c = getopt_long (argc, argv, short_options,
2961                                 long_options, &oi))
2962               == -1))
2963         {
2964           if (argc <= optind)
2965             break;
2966           files[nfiles++] = argv[optind++];
2967         }
2968       else switch (c)
2969         {
2970         case 1:
2971           key = NULL;
2972           if (optarg[0] == '+')
2973             {
2974               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2975                                       && ISDIGIT (argv[optind][1]));
2976               obsolete_usage |= minus_pos_usage & ~posixly_correct;
2977               if (obsolete_usage)
2978                 {
2979                   /* Treat +POS1 [-POS2] as a key if possible; but silently
2980                      treat an operand as a file if it is not a valid +POS1.  */
2981                   key = key_init (&key_buf);
2982                   s = parse_field_count (optarg + 1, &key->sword, NULL);
2983                   if (s && *s == '.')
2984                     s = parse_field_count (s + 1, &key->schar, NULL);
2985                   if (! (key->sword | key->schar))
2986                     key->sword = SIZE_MAX;
2987                   if (! s || *set_ordering (s, key, bl_start))
2988                     key = NULL;
2989                   else
2990                     {
2991                       if (minus_pos_usage)
2992                         {
2993                           char const *optarg1 = argv[optind++];
2994                           s = parse_field_count (optarg1 + 1, &key->eword,
2995                                              N_("invalid number after `-'"));
2996                           if (*s == '.')
2997                             s = parse_field_count (s + 1, &key->echar,
2998                                                N_("invalid number after `.'"));
2999                           if (*set_ordering (s, key, bl_end))
3000                             badfieldspec (optarg1,
3001                                       N_("stray character in field spec"));
3002                         }
3003                       insertkey (key);
3004                     }
3005                 }
3006             }
3007           if (! key)
3008             files[nfiles++] = optarg;
3009           break;
3010
3011         case SORT_OPTION:
3012           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3013           /* Fall through. */
3014         case 'b':
3015         case 'd':
3016         case 'f':
3017         case 'g':
3018         case 'i':
3019         case 'M':
3020         case 'n':
3021         case 'r':
3022         case 'R':
3023           {
3024             char str[2];
3025             str[0] = c;
3026             str[1] = '\0';
3027             set_ordering (str, &gkey, bl_both);
3028           }
3029           break;
3030
3031         case CHECK_OPTION:
3032           c = (optarg
3033                ? XARGMATCH ("--check", optarg, check_args, check_types)
3034                : 'c');
3035           /* Fall through.  */
3036         case 'c':
3037         case 'C':
3038           if (checkonly && checkonly != c)
3039             incompatible_options ("cC");
3040           checkonly = c;
3041           break;
3042
3043         case COMPRESS_PROGRAM_OPTION:
3044           if (compress_program && !STREQ (compress_program, optarg))
3045             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3046           compress_program = optarg;
3047           break;
3048
3049         case FILES0_FROM_OPTION:
3050           files_from = optarg;
3051           break;
3052
3053         case 'k':
3054           key = key_init (&key_buf);
3055
3056           /* Get POS1. */
3057           s = parse_field_count (optarg, &key->sword,
3058                                  N_("invalid number at field start"));
3059           if (! key->sword--)
3060             {
3061               /* Provoke with `sort -k0' */
3062               badfieldspec (optarg, N_("field number is zero"));
3063             }
3064           if (*s == '.')
3065             {
3066               s = parse_field_count (s + 1, &key->schar,
3067                                      N_("invalid number after `.'"));
3068               if (! key->schar--)
3069                 {
3070                   /* Provoke with `sort -k1.0' */
3071                   badfieldspec (optarg, N_("character offset is zero"));
3072                 }
3073             }
3074           if (! (key->sword | key->schar))
3075             key->sword = SIZE_MAX;
3076           s = set_ordering (s, key, bl_start);
3077           if (*s != ',')
3078             {
3079               key->eword = SIZE_MAX;
3080               key->echar = 0;
3081             }
3082           else
3083             {
3084               /* Get POS2. */
3085               s = parse_field_count (s + 1, &key->eword,
3086                                      N_("invalid number after `,'"));
3087               if (! key->eword--)
3088                 {
3089                   /* Provoke with `sort -k1,0' */
3090                   badfieldspec (optarg, N_("field number is zero"));
3091                 }
3092               if (*s == '.')
3093                 s = parse_field_count (s + 1, &key->echar,
3094                                        N_("invalid number after `.'"));
3095               else
3096                 {
3097                   /* `-k 2,3' is equivalent to `+1 -3'.  */
3098                   key->eword++;
3099                 }
3100               s = set_ordering (s, key, bl_end);
3101             }
3102           if (*s)
3103             badfieldspec (optarg, N_("stray character in field spec"));
3104           insertkey (key);
3105           break;
3106
3107         case 'm':
3108           mergeonly = true;
3109           break;
3110
3111         case NMERGE_OPTION:
3112           specify_nmerge (oi, c, optarg);
3113           break;
3114
3115         case 'o':
3116           if (outfile && !STREQ (outfile, optarg))
3117             error (SORT_FAILURE, 0, _("multiple output files specified"));
3118           outfile = optarg;
3119           break;
3120
3121         case RANDOM_SOURCE_OPTION:
3122           if (random_source && !STREQ (random_source, optarg))
3123             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3124           random_source = optarg;
3125           break;
3126
3127         case 's':
3128           stable = true;
3129           break;
3130
3131         case 'S':
3132           specify_sort_size (oi, c, optarg);
3133           break;
3134
3135         case 't':
3136           {
3137             char newtab = optarg[0];
3138             if (! newtab)
3139               error (SORT_FAILURE, 0, _("empty tab"));
3140             if (optarg[1])
3141               {
3142                 if (STREQ (optarg, "\\0"))
3143                   newtab = '\0';
3144                 else
3145                   {
3146                     /* Provoke with `sort -txx'.  Complain about
3147                        "multi-character tab" instead of "multibyte tab", so
3148                        that the diagnostic's wording does not need to be
3149                        changed once multibyte characters are supported.  */
3150                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3151                            quote (optarg));
3152                   }
3153               }
3154             if (tab != TAB_DEFAULT && tab != newtab)
3155               error (SORT_FAILURE, 0, _("incompatible tabs"));
3156             tab = newtab;
3157           }
3158           break;
3159
3160         case 'T':
3161           add_temp_dir (optarg);
3162           break;
3163
3164         case 'u':
3165           unique = true;
3166           break;
3167
3168         case 'y':
3169           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3170              through Solaris 7.  It is also accepted by many non-Solaris
3171              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3172              -y is marked as obsolete starting with Solaris 8 (1999), but is
3173              still accepted as of Solaris 10 prerelease (2004).
3174
3175              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3176              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3177              and which in general ignores the argument after "-y" if it
3178              consists entirely of digits (it can even be empty).  */
3179           if (optarg == argv[optind - 1])
3180             {
3181               char const *p;
3182               for (p = optarg; ISDIGIT (*p); p++)
3183                 continue;
3184               optind -= (*p != '\0');
3185             }
3186           break;
3187
3188         case 'z':
3189           eolchar = 0;
3190           break;
3191
3192         case_GETOPT_HELP_CHAR;
3193
3194         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3195
3196         default:
3197           usage (SORT_FAILURE);
3198         }
3199     }
3200
3201   if (files_from)
3202     {
3203       FILE *stream;
3204
3205       /* When using --files0-from=F, you may not specify any files
3206          on the command-line.  */
3207       if (nfiles)
3208         {
3209           error (0, 0, _("extra operand %s"), quote (files[0]));
3210           fprintf (stderr, "%s\n",
3211                    _("file operands cannot be combined with --files0-from"));
3212           usage (SORT_FAILURE);
3213         }
3214
3215       if (STREQ (files_from, "-"))
3216         stream = stdin;
3217       else
3218         {
3219           stream = fopen (files_from, "r");
3220           if (stream == NULL)
3221             error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3222                    quote (files_from));
3223         }
3224
3225       readtokens0_init (&tok);
3226
3227       if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3228         error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3229                quote (files_from));
3230
3231       if (tok.n_tok)
3232         {
3233           size_t i;
3234           free (files);
3235           files = tok.tok;
3236           nfiles = tok.n_tok;
3237           for (i = 0; i < nfiles; i++)
3238           {
3239               if (STREQ (files[i], "-"))
3240                 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3241                                           "no file name of %s allowed"),
3242                        quote (files[i]));
3243               else if (files[i][0] == '\0')
3244                 {
3245                   /* Using the standard `filename:line-number:' prefix here is
3246                      not totally appropriate, since NUL is the separator, not NL,
3247                      but it might be better than nothing.  */
3248                   unsigned long int file_number = i + 1;
3249                   error (SORT_FAILURE, 0,
3250                          _("%s:%lu: invalid zero-length file name"),
3251                          quotearg_colon (files_from), file_number);
3252                 }
3253           }
3254         }
3255       else
3256         error (SORT_FAILURE, 0, _("no input from %s"),
3257                quote (files_from));
3258     }
3259
3260   /* Inheritance of global options to individual keys. */
3261   for (key = keylist; key; key = key->next)
3262     {
3263       if (! (key->ignore || key->translate
3264              || (key->skipsblanks | key->reverse
3265                  | key->skipeblanks | key->month | key->numeric
3266                  | key->general_numeric
3267                  | key->random)))
3268         {
3269           key->ignore = gkey.ignore;
3270           key->translate = gkey.translate;
3271           key->skipsblanks = gkey.skipsblanks;
3272           key->skipeblanks = gkey.skipeblanks;
3273           key->month = gkey.month;
3274           key->numeric = gkey.numeric;
3275           key->general_numeric = gkey.general_numeric;
3276           key->random = gkey.random;
3277           key->reverse = gkey.reverse;
3278         }
3279
3280       need_random |= key->random;
3281     }
3282
3283   if (!keylist && (gkey.ignore || gkey.translate
3284                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3285                        | gkey.numeric | gkey.general_numeric
3286                        | gkey.random)))
3287     {
3288       insertkey (&gkey);
3289       need_random |= gkey.random;
3290     }
3291
3292   check_ordering_compatibility ();
3293
3294   reverse = gkey.reverse;
3295
3296   if (need_random)
3297     {
3298       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3299       if (! randread_source)
3300         die (_("open failed"), random_source);
3301     }
3302
3303   if (temp_dir_count == 0)
3304     {
3305       char const *tmp_dir = getenv ("TMPDIR");
3306       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3307     }
3308
3309   if (nfiles == 0)
3310     {
3311       static char *minus = "-";
3312       nfiles = 1;
3313       free (files);
3314       files = &minus;
3315     }
3316
3317   /* Need to re-check that we meet the minimum requirement for memory
3318      usage with the final value for NMERGE. */
3319   if (0 < sort_size)
3320     sort_size = MAX (sort_size, MIN_SORT_SIZE);
3321
3322   if (checkonly)
3323     {
3324       if (nfiles > 1)
3325         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3326                quote (files[1]), checkonly);
3327
3328       if (outfile)
3329         {
3330           static char opts[] = {0, 'o', 0};
3331           opts[0] = checkonly;
3332           incompatible_options (opts);
3333         }
3334
3335       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3336          input is not properly sorted.  */
3337       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3338     }
3339
3340   if (mergeonly)
3341     {
3342       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3343       size_t i;
3344
3345       for (i = 0; i < nfiles; ++i)
3346         sortfiles[i].name = files[i];
3347
3348       merge (sortfiles, 0, nfiles, outfile);
3349       IF_LINT (free (sortfiles));
3350     }
3351   else
3352     sort (files, nfiles, outfile);
3353
3354   if (have_read_stdin && fclose (stdin) == EOF)
3355     die (_("close failed"), "-");
3356
3357   exit (EXIT_SUCCESS);
3358 }