src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17    Written December 1988 by Mike Haertel.
  18    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  19    or (US mail) as Mike Haertel c/o Free Software Foundation.
  20
  21    Ørn E. Hansen added NLS support in 1997.  */
  22
  23 #include <config.h>
  24
  25 #include <getopt.h>
  26 #include <sys/types.h>
  27 #include <sys/wait.h>
  28 #include <signal.h>
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "error.h"
  32 #include "hard-locale.h"
  33 #include "hash.h"
  34 #include "inttostr.h"
  35 #include "md5.h"
  36 #include "physmem.h"
  37 #include "posixver.h"
  38 #include "quote.h"
  39 #include "quotearg.h"
  40 #include "randread.h"
  41 #include "readtokens0.h"
  42 #include "stdio--.h"
  43 #include "stdlib--.h"
  44 #include "strnumcmp.h"
  45 #include "xmemcoll.h"
  46 #include "xmemxfrm.h"
  47 #include "xstrtol.h"
  48
  49 #if HAVE_SYS_RESOURCE_H
  50 # include <sys/resource.h>
  51 #endif
  52 #ifndef RLIMIT_DATA
  53 struct rlimit { size_t rlim_cur; };
  54 # define getrlimit(Resource, Rlp) (-1)
  55 #endif
  56
  57 /* The official name of this program (e.g., no `g' prefix).  */
  58 #define PROGRAM_NAME "sort"
  59
  60 #define AUTHORS \
  61   proper_name ("Mike Haertel"), \
  62   proper_name ("Paul Eggert")
  63
  64 #if HAVE_LANGINFO_CODESET
  65 # include <langinfo.h>
  66 #endif
  67
  68 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  69    present.  */
  70 #ifndef SA_NOCLDSTOP
  71 # define SA_NOCLDSTOP 0
  72 /* No sigprocmask.  Always 'return' zero. */
  73 # define sigprocmask(How, Set, Oset) (0)
  74 # define sigset_t int
  75 # if ! HAVE_SIGINTERRUPT
  76 #  define siginterrupt(sig, flag) /* empty */
  77 # endif
  78 #endif
  79
  80 #ifndef STDC_HEADERS
  81 double strtod ();
  82 #endif
  83
  84 #define UCHAR_LIM (UCHAR_MAX + 1)
  85
  86 #ifndef DEFAULT_TMPDIR
  87 # define DEFAULT_TMPDIR "/tmp"
  88 #endif
  89
  90 /* Exit statuses.  */
  91 enum
  92   {
  93     /* POSIX says to exit with status 1 if invoked with -c and the
  94        input is not properly sorted.  */
  95     SORT_OUT_OF_ORDER = 1,
  96
  97     /* POSIX says any other irregular exit must exit with a status
  98        code greater than 1.  */
  99     SORT_FAILURE = 2
 100   };
 101
 102 enum
 103   {
 104     /* The number of times we should try to fork a compression process
 105        (we retry if the fork call fails).  We don't _need_ to compress
 106        temp files, this is just to reduce disk access, so this number
 107        can be small.  */
 108     MAX_FORK_TRIES_COMPRESS = 2,
 109
 110     /* The number of times we should try to fork a decompression process.
 111        If we can't fork a decompression process, we can't sort, so this
 112        number should be big.  */
 113     MAX_FORK_TRIES_DECOMPRESS = 8
 114   };
 115
 116 /* The representation of the decimal point in the current locale.  */
 117 static int decimal_point;
 118
 119 /* Thousands separator; if -1, then there isn't one.  */
 120 static int thousands_sep;
 121
 122 /* Nonzero if the corresponding locales are hard.  */
 123 static bool hard_LC_COLLATE;
 124 #if HAVE_NL_LANGINFO
 125 static bool hard_LC_TIME;
 126 #endif
 127
 128 #define NONZERO(x) ((x) != 0)
 129
 130 /* The kind of blanks for '-b' to skip in various options. */
 131 enum blanktype { bl_start, bl_end, bl_both };
 132
 133 /* The character marking end of line. Default to \n. */
 134 static char eolchar = '\n';
 135
 136 /* Lines are held in core as counted strings. */
 137 struct line
 138 {
 139   char *text;                   /* Text of the line. */
 140   size_t length;                /* Length including final newline. */
 141   char *keybeg;                 /* Start of first key. */
 142   char *keylim;                 /* Limit of first key. */
 143 };
 144
 145 /* Input buffers. */
 146 struct buffer
 147 {
 148   char *buf;                    /* Dynamically allocated buffer,
 149                                    partitioned into 3 regions:
 150                                    - input data;
 151                                    - unused area;
 152                                    - an array of lines, in reverse order.  */
 153   size_t used;                  /* Number of bytes used for input data.  */
 154   size_t nlines;                /* Number of lines in the line array.  */
 155   size_t alloc;                 /* Number of bytes allocated. */
 156   size_t left;                  /* Number of bytes left from previous reads. */
 157   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 158   bool eof;                     /* An EOF has been read.  */
 159 };
 160
 161 struct keyfield
 162 {
 163   size_t sword;                 /* Zero-origin 'word' to start at. */
 164   size_t schar;                 /* Additional characters to skip. */
 165   size_t eword;                 /* Zero-origin first word after field. */
 166   size_t echar;                 /* Additional characters in field. */
 167   bool const *ignore;           /* Boolean array of characters to ignore. */
 168   char const *translate;        /* Translation applied to characters. */
 169   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 170   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 171   bool numeric;                 /* Flag for numeric comparison.  Handle
 172                                    strings of digits with optional decimal
 173                                    point, but no exponential notation. */
 174   bool random;                  /* Sort by random hash of key.  */
 175   bool general_numeric;         /* Flag for general, numeric comparison.
 176                                    Handle numbers in exponential notation. */
 177   bool month;                   /* Flag for comparison by month name. */
 178   bool reverse;                 /* Reverse the sense of comparison. */
 179   struct keyfield *next;        /* Next keyfield to try. */
 180 };
 181
 182 struct month
 183 {
 184   char const *name;
 185   int val;
 186 };
 187
 188 /* FIXME: None of these tables work with multibyte character sets.
 189    Also, there are many other bugs when handling multibyte characters.
 190    One way to fix this is to rewrite `sort' to use wide characters
 191    internally, but doing this with good performance is a bit
 192    tricky.  */
 193
 194 /* Table of blanks.  */
 195 static bool blanks[UCHAR_LIM];
 196
 197 /* Table of non-printing characters. */
 198 static bool nonprinting[UCHAR_LIM];
 199
 200 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 201 static bool nondictionary[UCHAR_LIM];
 202
 203 /* Translation table folding lower case to upper.  */
 204 static char fold_toupper[UCHAR_LIM];
 205
 206 #define MONTHS_PER_YEAR 12
 207
 208 /* Table mapping month names to integers.
 209    Alphabetic order allows binary search. */
 210 static struct month monthtab[] =
 211 {
 212   {"APR", 4},
 213   {"AUG", 8},
 214   {"DEC", 12},
 215   {"FEB", 2},
 216   {"JAN", 1},
 217   {"JUL", 7},
 218   {"JUN", 6},
 219   {"MAR", 3},
 220   {"MAY", 5},
 221   {"NOV", 11},
 222   {"OCT", 10},
 223   {"SEP", 9}
 224 };
 225
 226 /* During the merge phase, the number of files to merge at once. */
 227 #define NMERGE 16
 228
 229 /* Minimum size for a merge or check buffer.  */
 230 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 231
 232 /* Minimum sort size; the code might not work with smaller sizes.  */
 233 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 234
 235 /* The number of bytes needed for a merge or check buffer, which can
 236    function relatively efficiently even if it holds only one line.  If
 237    a longer line is seen, this value is increased.  */
 238 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 239
 240 /* The approximate maximum number of bytes of main memory to use, as
 241    specified by the user.  Zero if the user has not specified a size.  */
 242 static size_t sort_size;
 243
 244 /* The guessed size for non-regular files.  */
 245 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 246
 247 /* Array of directory names in which any temporary files are to be created. */
 248 static char const **temp_dirs;
 249
 250 /* Number of temporary directory names used.  */
 251 static size_t temp_dir_count;
 252
 253 /* Number of allocated slots in temp_dirs.  */
 254 static size_t temp_dir_alloc;
 255
 256 /* Flag to reverse the order of all comparisons. */
 257 static bool reverse;
 258
 259 /* Flag for stable sort.  This turns off the last ditch bytewise
 260    comparison of lines, and instead leaves lines in the same order
 261    they were read if all keys compare equal.  */
 262 static bool stable;
 263
 264 /* If TAB has this value, blanks separate fields.  */
 265 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 266
 267 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 268    separated by the empty string between a non-blank character and a blank
 269    character. */
 270 static int tab = TAB_DEFAULT;
 271
 272 /* Flag to remove consecutive duplicate lines from the output.
 273    Only the last of a sequence of equal lines will be output. */
 274 static bool unique;
 275
 276 /* Nonzero if any of the input files are the standard input. */
 277 static bool have_read_stdin;
 278
 279 /* List of key field comparisons to be tried.  */
 280 static struct keyfield *keylist;
 281
 282 /* Program used to (de)compress temp files.  Must accept -d.  */
 283 static char const *compress_program;
 284
 285 static void sortlines_temp (struct line *, size_t, struct line *);
 286
 287 /* Report MESSAGE for FILE, then clean up and exit.
 288    If FILE is null, it represents standard output.  */
 289
 290 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 291 static void
 292 die (char const *message, char const *file)
 293 {
 294   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 295   exit (SORT_FAILURE);
 296 }
 297
 298 void
 299 usage (int status)
 300 {
 301   if (status != EXIT_SUCCESS)
 302     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 303              program_name);
 304   else
 305     {
 306       printf (_("\
 307 Usage: %s [OPTION]... [FILE]...\n\
 308   or:  %s [OPTION]... --files0-from=F\n\
 309 "),
 310               program_name, program_name);
 311       fputs (_("\
 312 Write sorted concatenation of all FILE(s) to standard output.\n\
 313 \n\
 314 "), stdout);
 315       fputs (_("\
 316 Mandatory arguments to long options are mandatory for short options too.\n\
 317 "), stdout);
 318       fputs (_("\
 319 Ordering options:\n\
 320 \n\
 321 "), stdout);
 322       fputs (_("\
 323   -b, --ignore-leading-blanks  ignore leading blanks\n\
 324   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 325   -f, --ignore-case           fold lower case to upper case characters\n\
 326 "), stdout);
 327       fputs (_("\
 328   -g, --general-numeric-sort  compare according to general numerical value\n\
 329   -i, --ignore-nonprinting    consider only printable characters\n\
 330   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 331   -n, --numeric-sort          compare according to string numerical value\n\
 332   -R, --random-sort           sort by random hash of keys\n\
 333       --random-source=FILE    get random bytes from FILE (default /dev/urandom)\n\
 334       --sort=WORD             sort according to WORD:\n\
 335                                 general-numeric -g, month -M, numeric -n,\n\
 336                                 random -R\n\
 337   -r, --reverse               reverse the result of comparisons\n\
 338 \n\
 339 "), stdout);
 340       fputs (_("\
 341 Other options:\n\
 342 \n\
 343   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 344   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 345       --compress-program=PROG  compress temporaries with PROG;\n\
 346                               decompress them with PROG -d\n\
 347       --files0-from=F       read input from the files specified by\n\
 348                             NUL-terminated names in file F\n\
 349 "), stdout);
 350       fputs (_("\
 351   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
 352   -m, --merge               merge already sorted files; do not sort\n\
 353 "), stdout);
 354       fputs (_("\
 355   -o, --output=FILE         write result to FILE instead of standard output\n\
 356   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 357   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 358 "), stdout);
 359       printf (_("\
 360   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 361   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 362                               multiple options specify multiple directories\n\
 363   -u, --unique              with -c, check for strict ordering;\n\
 364                               without -c, output only the first of an equal run\n\
 365 "), DEFAULT_TMPDIR);
 366       fputs (_("\
 367   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 368 "), stdout);
 369       fputs (HELP_OPTION_DESCRIPTION, stdout);
 370       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 371       fputs (_("\
 372 \n\
 373 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 374 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 375 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 376 one or more single-letter ordering options, which override global ordering\n\
 377 options for that key.  If no key is given, use the entire line as the key.\n\
 378 \n\
 379 SIZE may be followed by the following multiplicative suffixes:\n\
 380 "), stdout);
 381       fputs (_("\
 382 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 383 \n\
 384 With no FILE, or when FILE is -, read standard input.\n\
 385 \n\
 386 *** WARNING ***\n\
 387 The locale specified by the environment affects sort order.\n\
 388 Set LC_ALL=C to get the traditional sort order that uses\n\
 389 native byte values.\n\
 390 "), stdout );
 391       emit_bug_reporting_address ();
 392     }
 393
 394   exit (status);
 395 }
 396
 397 /* For long options that have no equivalent short option, use a
 398    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 399 enum
 400 {
 401   CHECK_OPTION = CHAR_MAX + 1,
 402   COMPRESS_PROGRAM_OPTION,
 403   FILES0_FROM_OPTION,
 404   RANDOM_SOURCE_OPTION,
 405   SORT_OPTION
 406 };
 407
 408 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
 409
 410 static struct option const long_options[] =
 411 {
 412   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 413   {"check", optional_argument, NULL, CHECK_OPTION},
 414   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 415   {"dictionary-order", no_argument, NULL, 'd'},
 416   {"ignore-case", no_argument, NULL, 'f'},
 417   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
 418   {"general-numeric-sort", no_argument, NULL, 'g'},
 419   {"ignore-nonprinting", no_argument, NULL, 'i'},
 420   {"key", required_argument, NULL, 'k'},
 421   {"merge", no_argument, NULL, 'm'},
 422   {"month-sort", no_argument, NULL, 'M'},
 423   {"numeric-sort", no_argument, NULL, 'n'},
 424   {"random-sort", no_argument, NULL, 'R'},
 425   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 426   {"sort", required_argument, NULL, SORT_OPTION},
 427   {"output", required_argument, NULL, 'o'},
 428   {"reverse", no_argument, NULL, 'r'},
 429   {"stable", no_argument, NULL, 's'},
 430   {"buffer-size", required_argument, NULL, 'S'},
 431   {"field-separator", required_argument, NULL, 't'},
 432   {"temporary-directory", required_argument, NULL, 'T'},
 433   {"unique", no_argument, NULL, 'u'},
 434   {"zero-terminated", no_argument, NULL, 'z'},
 435   {GETOPT_HELP_OPTION_DECL},
 436   {GETOPT_VERSION_OPTION_DECL},
 437   {NULL, 0, NULL, 0},
 438 };
 439
 440 static char const *const check_args[] =
 441 {
 442   "quiet", "silent", "diagnose-first", NULL
 443 };
 444 static char const check_types[] =
 445 {
 446   'C', 'C', 'c'
 447 };
 448 ARGMATCH_VERIFY (check_args, check_types);
 449
 450 static char const *const sort_args[] =
 451 {
 452   "general-numeric", "month", "numeric", "random", NULL
 453 };
 454 static char const sort_types[] =
 455 {
 456   'g', 'M', 'n', 'R'
 457 };
 458 ARGMATCH_VERIFY (sort_args, sort_types);
 459
 460 /* The set of signals that are caught.  */
 461 static sigset_t caught_signals;
 462
 463 /* Critical section status.  */
 464 struct cs_status
 465 {
 466   bool valid;
 467   sigset_t sigs;
 468 };
 469
 470 /* Enter a critical section.  */
 471 static struct cs_status
 472 cs_enter (void)
 473 {
 474   struct cs_status status;
 475   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 476   return status;
 477 }
 478
 479 /* Leave a critical section.  */
 480 static void
 481 cs_leave (struct cs_status status)
 482 {
 483   if (status.valid)
 484     {
 485       /* Ignore failure when restoring the signal mask. */
 486       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 487     }
 488 }
 489
 490 /* The list of temporary files. */
 491 struct tempnode
 492 {
 493   struct tempnode *volatile next;
 494   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 495   char name[1];  /* Actual size is 1 + file name length.  */
 496 };
 497 static struct tempnode *volatile temphead;
 498 static struct tempnode *volatile *temptail = &temphead;
 499
 500 struct sortfile
 501 {
 502   char const *name;
 503   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 504 };
 505
 506 /* A table where we store compression process states.  We clean up all
 507    processes in a timely manner so as not to exhaust system resources,
 508    so we store the info on whether the process is still running, or has
 509    been reaped here.  */
 510 static Hash_table *proctab;
 511
 512 enum { INIT_PROCTAB_SIZE = 47 };
 513
 514 enum procstate { ALIVE, ZOMBIE };
 515
 516 /* A proctab entry.  The COUNT field is there in case we fork a new
 517    compression process that has the same PID as an old zombie process
 518    that is still in the table (because the process to decompress the
 519    temp file it was associated with hasn't started yet).  */
 520 struct procnode
 521 {
 522   pid_t pid;
 523   enum procstate state;
 524   size_t count;
 525 };
 526
 527 static size_t
 528 proctab_hasher (const void *entry, size_t tabsize)
 529 {
 530   const struct procnode *node = entry;
 531   return node->pid % tabsize;
 532 }
 533
 534 static bool
 535 proctab_comparator (const void *e1, const void *e2)
 536 {
 537   const struct procnode *n1 = e1, *n2 = e2;
 538   return n1->pid == n2->pid;
 539 }
 540
 541 /* The total number of forked processes (compressors and decompressors)
 542    that have not been reaped yet. */
 543 static size_t nprocs;
 544
 545 /* The number of child processes we'll allow before we try to reap some. */
 546 enum { MAX_PROCS_BEFORE_REAP = 2 };
 547
 548 /* If 0 < PID, wait for the child process with that PID to exit.
 549    If PID is -1, clean up a random child process which has finished and
 550    return the process ID of that child.  If PID is -1 and no processes
 551    have quit yet, return 0 without waiting.  */
 552
 553 static pid_t
 554 reap (pid_t pid)
 555 {
 556   int status;
 557   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 558
 559   if (cpid < 0)
 560     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 561            compress_program);
 562   else if (0 < cpid)
 563     {
 564       if (! WIFEXITED (status) || WEXITSTATUS (status))
 565         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 566                compress_program);
 567       --nprocs;
 568     }
 569
 570   return cpid;
 571 }
 572
 573 /* Add the PID of a running compression process to proctab, or update
 574    the entry COUNT and STATE fields if it's already there.  This also
 575    creates the table for us the first time it's called.  */
 576
 577 static void
 578 register_proc (pid_t pid)
 579 {
 580   struct procnode test, *node;
 581
 582   if (! proctab)
 583     {
 584       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 585                                  proctab_hasher,
 586                                  proctab_comparator,
 587                                  free);
 588       if (! proctab)
 589         xalloc_die ();
 590     }
 591
 592   test.pid = pid;
 593   node = hash_lookup (proctab, &test);
 594   if (node)
 595     {
 596       node->state = ALIVE;
 597       ++node->count;
 598     }
 599   else
 600     {
 601       node = xmalloc (sizeof *node);
 602       node->pid = pid;
 603       node->state = ALIVE;
 604       node->count = 1;
 605       hash_insert (proctab, node);
 606     }
 607 }
 608
 609 /* This is called when we reap a random process.  We don't know
 610    whether we have reaped a compression process or a decompression
 611    process until we look in the table.  If there's an ALIVE entry for
 612    it, then we have reaped a compression process, so change the state
 613    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 614
 615 static void
 616 update_proc (pid_t pid)
 617 {
 618   struct procnode test, *node;
 619
 620   test.pid = pid;
 621   node = hash_lookup (proctab, &test);
 622   if (node)
 623     node->state = ZOMBIE;
 624 }
 625
 626 /* This is for when we need to wait for a compression process to exit.
 627    If it has a ZOMBIE entry in the table then it's already dead and has
 628    been reaped.  Note that if there's an ALIVE entry for it, it still may
 629    already have died and been reaped if a second process was created with
 630    the same PID.  This is probably exceedingly rare, but to be on the safe
 631    side we will have to wait for any compression process with this PID.  */
 632
 633 static void
 634 wait_proc (pid_t pid)
 635 {
 636   struct procnode test, *node;
 637
 638   test.pid = pid;
 639   node = hash_lookup (proctab, &test);
 640   if (node->state == ALIVE)
 641     reap (pid);
 642
 643   node->state = ZOMBIE;
 644   if (! --node->count)
 645     {
 646       hash_delete (proctab, node);
 647       free (node);
 648     }
 649 }
 650
 651 /* Keep reaping finished children as long as there are more to reap.
 652    This doesn't block waiting for any of them, it only reaps those
 653    that are already dead.  */
 654
 655 static void
 656 reap_some (void)
 657 {
 658   pid_t pid;
 659
 660   while (0 < nprocs && (pid = reap (-1)))
 661     update_proc (pid);
 662 }
 663
 664 /* Clean up any remaining temporary files.  */
 665
 666 static void
 667 cleanup (void)
 668 {
 669   struct tempnode const *node;
 670
 671   for (node = temphead; node; node = node->next)
 672     unlink (node->name);
 673   temphead = NULL;
 674 }
 675
 676 /* Cleanup actions to take when exiting.  */
 677
 678 static void
 679 exit_cleanup (void)
 680 {
 681   if (temphead)
 682     {
 683       /* Clean up any remaining temporary files in a critical section so
 684          that a signal handler does not try to clean them too.  */
 685       struct cs_status cs = cs_enter ();
 686       cleanup ();
 687       cs_leave (cs);
 688     }
 689
 690   close_stdout ();
 691 }
 692
 693 /* Create a new temporary file, returning its newly allocated tempnode.
 694    Store into *PFD the file descriptor open for writing.  */
 695
 696 static struct tempnode *
 697 create_temp_file (int *pfd)
 698 {
 699   static char const slashbase[] = "/sortXXXXXX";
 700   static size_t temp_dir_index;
 701   int fd;
 702   int saved_errno;
 703   char const *temp_dir = temp_dirs[temp_dir_index];
 704   size_t len = strlen (temp_dir);
 705   struct tempnode *node =
 706     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 707   char *file = node->name;
 708   struct cs_status cs;
 709
 710   memcpy (file, temp_dir, len);
 711   memcpy (file + len, slashbase, sizeof slashbase);
 712   node->next = NULL;
 713   node->pid = 0;
 714   if (++temp_dir_index == temp_dir_count)
 715     temp_dir_index = 0;
 716
 717   /* Create the temporary file in a critical section, to avoid races.  */
 718   cs = cs_enter ();
 719   fd = mkstemp (file);
 720   if (0 <= fd)
 721     {
 722       *temptail = node;
 723       temptail = &node->next;
 724     }
 725   saved_errno = errno;
 726   cs_leave (cs);
 727   errno = saved_errno;
 728
 729   if (fd < 0)
 730     die (_("cannot create temporary file"), file);
 731
 732   *pfd = fd;
 733   return node;
 734 }
 735
 736 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 737    standard output; HOW should be "w".  When opening for input, "-"
 738    means standard input.  To avoid confusion, do not return file
 739    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 740    opening an ordinary FILE.  */
 741
 742 static FILE *
 743 xfopen (const char *file, const char *how)
 744 {
 745   FILE *fp;
 746
 747   if (!file)
 748     fp = stdout;
 749   else if (STREQ (file, "-") && *how == 'r')
 750     {
 751       have_read_stdin = true;
 752       fp = stdin;
 753     }
 754   else
 755     {
 756       fp = fopen (file, how);
 757       if (! fp)
 758         die (_("open failed"), file);
 759     }
 760
 761   return fp;
 762 }
 763
 764 /* Close FP, whose name is FILE, and report any errors.  */
 765
 766 static void
 767 xfclose (FILE *fp, char const *file)
 768 {
 769   switch (fileno (fp))
 770     {
 771     case STDIN_FILENO:
 772       /* Allow reading stdin from tty more than once.  */
 773       if (feof (fp))
 774         clearerr (fp);
 775       break;
 776
 777     case STDOUT_FILENO:
 778       /* Don't close stdout just yet.  close_stdout does that.  */
 779       if (fflush (fp) != 0)
 780         die (_("fflush failed"), file);
 781       break;
 782
 783     default:
 784       if (fclose (fp) != 0)
 785         die (_("close failed"), file);
 786       break;
 787     }
 788 }
 789
 790 static void
 791 dup2_or_die (int oldfd, int newfd)
 792 {
 793   if (dup2 (oldfd, newfd) < 0)
 794     error (SORT_FAILURE, errno, _("dup2 failed"));
 795 }
 796
 797 /* Fork a child process for piping to and do common cleanup.  The
 798    TRIES parameter tells us how many times to try to fork before
 799    giving up.  Return the PID of the child or -1 if fork failed.  */
 800
 801 static pid_t
 802 pipe_fork (int pipefds[2], size_t tries)
 803 {
 804 #if HAVE_WORKING_FORK
 805   struct tempnode *saved_temphead;
 806   int saved_errno;
 807   unsigned int wait_retry = 1;
 808   pid_t pid IF_LINT (= -1);
 809   struct cs_status cs;
 810
 811   if (pipe (pipefds) < 0)
 812     return -1;
 813
 814   while (tries--)
 815     {
 816       /* This is so the child process won't delete our temp files
 817          if it receives a signal before exec-ing.  */
 818       cs = cs_enter ();
 819       saved_temphead = temphead;
 820       temphead = NULL;
 821
 822       pid = fork ();
 823       saved_errno = errno;
 824       if (pid)
 825         temphead = saved_temphead;
 826
 827       cs_leave (cs);
 828       errno = saved_errno;
 829
 830       if (0 <= pid || errno != EAGAIN)
 831         break;
 832       else
 833         {
 834           sleep (wait_retry);
 835           wait_retry *= 2;
 836           reap_some ();
 837         }
 838     }
 839
 840   if (pid < 0)
 841     {
 842       close (pipefds[0]);
 843       close (pipefds[1]);
 844     }
 845   else if (pid == 0)
 846     {
 847       close (STDIN_FILENO);
 848       close (STDOUT_FILENO);
 849     }
 850   else
 851     ++nprocs;
 852
 853   return pid;
 854
 855 #else  /* ! HAVE_WORKING_FORK */
 856   return -1;
 857 #endif
 858 }
 859
 860 /* Create a temporary file and start a compression program to filter output
 861    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 862    set it to the PID of the newly-created process.  */
 863
 864 static char *
 865 create_temp (FILE **pfp, pid_t *ppid)
 866 {
 867   int tempfd;
 868   struct tempnode *node = create_temp_file (&tempfd);
 869   char *name = node->name;
 870
 871   if (compress_program)
 872     {
 873       int pipefds[2];
 874
 875       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 876       if (0 < node->pid)
 877         {
 878           close (tempfd);
 879           close (pipefds[0]);
 880           tempfd = pipefds[1];
 881
 882           register_proc (node->pid);
 883         }
 884       else if (node->pid == 0)
 885         {
 886           close (pipefds[1]);
 887           dup2_or_die (tempfd, STDOUT_FILENO);
 888           close (tempfd);
 889           dup2_or_die (pipefds[0], STDIN_FILENO);
 890           close (pipefds[0]);
 891
 892           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 893             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 894                    compress_program);
 895         }
 896       else
 897         node->pid = 0;
 898     }
 899
 900   *pfp = fdopen (tempfd, "w");
 901   if (! *pfp)
 902     die (_("couldn't create temporary file"), name);
 903
 904   if (ppid)
 905     *ppid = node->pid;
 906
 907   return name;
 908 }
 909
 910 /* Open a compressed temp file and start a decompression process through
 911    which to filter the input.  PID must be the valid processes ID of the
 912    process used to compress the file.  */
 913
 914 static FILE *
 915 open_temp (const char *name, pid_t pid)
 916 {
 917   int tempfd, pipefds[2];
 918   pid_t child_pid;
 919   FILE *fp;
 920
 921   wait_proc (pid);
 922
 923   tempfd = open (name, O_RDONLY);
 924   if (tempfd < 0)
 925     die (_("couldn't open temporary file"), name);
 926
 927   child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
 928   if (0 < child_pid)
 929     {
 930       close (tempfd);
 931       close (pipefds[1]);
 932     }
 933   else if (child_pid == 0)
 934     {
 935       close (pipefds[0]);
 936       dup2_or_die (tempfd, STDIN_FILENO);
 937       close (tempfd);
 938       dup2_or_die (pipefds[1], STDOUT_FILENO);
 939       close (pipefds[1]);
 940
 941       if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
 942         error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
 943                compress_program);
 944     }
 945   else
 946     error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
 947            compress_program);
 948
 949   fp = fdopen (pipefds[0], "r");
 950   if (! fp)
 951     die (_("couldn't create temporary file"), name);
 952
 953   return fp;
 954 }
 955
 956 static void
 957 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 958 {
 959   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 960     die (_("write failed"), output_file);
 961 }
 962
 963 /* Append DIR to the array of temporary directory names.  */
 964 static void
 965 add_temp_dir (char const *dir)
 966 {
 967   if (temp_dir_count == temp_dir_alloc)
 968     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
 969
 970   temp_dirs[temp_dir_count++] = dir;
 971 }
 972
 973 /* Remove NAME from the list of temporary files.  */
 974
 975 static void
 976 zaptemp (const char *name)
 977 {
 978   struct tempnode *volatile *pnode;
 979   struct tempnode *node;
 980   struct tempnode *next;
 981   int unlink_status;
 982   int unlink_errno = 0;
 983   struct cs_status cs;
 984
 985   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
 986     continue;
 987
 988   /* Unlink the temporary file in a critical section to avoid races.  */
 989   next = node->next;
 990   cs = cs_enter ();
 991   unlink_status = unlink (name);
 992   unlink_errno = errno;
 993   *pnode = next;
 994   cs_leave (cs);
 995
 996   if (unlink_status != 0)
 997     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
 998   if (! next)
 999     temptail = pnode;
1000   free (node);
1001 }
1002
1003 #if HAVE_NL_LANGINFO
1004
1005 static int
1006 struct_month_cmp (const void *m1, const void *m2)
1007 {
1008   struct month const *month1 = m1;
1009   struct month const *month2 = m2;
1010   return strcmp (month1->name, month2->name);
1011 }
1012
1013 #endif
1014
1015 /* Initialize the character class tables. */
1016
1017 static void
1018 inittables (void)
1019 {
1020   size_t i;
1021
1022   for (i = 0; i < UCHAR_LIM; ++i)
1023     {
1024       blanks[i] = !! isblank (i);
1025       nonprinting[i] = ! isprint (i);
1026       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1027       fold_toupper[i] = toupper (i);
1028     }
1029
1030 #if HAVE_NL_LANGINFO
1031   /* If we're not in the "C" locale, read different names for months.  */
1032   if (hard_LC_TIME)
1033     {
1034       for (i = 0; i < MONTHS_PER_YEAR; i++)
1035         {
1036           char const *s;
1037           size_t s_len;
1038           size_t j;
1039           char *name;
1040
1041           s = (char *) nl_langinfo (ABMON_1 + i);
1042           s_len = strlen (s);
1043           monthtab[i].name = name = xmalloc (s_len + 1);
1044           monthtab[i].val = i + 1;
1045
1046           for (j = 0; j < s_len; j++)
1047             name[j] = fold_toupper[to_uchar (s[j])];
1048           name[j] = '\0';
1049         }
1050       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1051              sizeof *monthtab, struct_month_cmp);
1052     }
1053 #endif
1054 }
1055
1056 /* Specify the amount of main memory to use when sorting.  */
1057 static void
1058 specify_sort_size (int oi, char c, char const *s)
1059 {
1060   uintmax_t n;
1061   char *suffix;
1062   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1063
1064   /* The default unit is KiB.  */
1065   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1066     {
1067       if (n <= UINTMAX_MAX / 1024)
1068         n *= 1024;
1069       else
1070         e = LONGINT_OVERFLOW;
1071     }
1072
1073   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1074   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1075     switch (suffix[0])
1076       {
1077       case 'b':
1078         e = LONGINT_OK;
1079         break;
1080
1081       case '%':
1082         {
1083           double mem = physmem_total () * n / 100;
1084
1085           /* Use "<", not "<=", to avoid problems with rounding.  */
1086           if (mem < UINTMAX_MAX)
1087             {
1088               n = mem;
1089               e = LONGINT_OK;
1090             }
1091           else
1092             e = LONGINT_OVERFLOW;
1093         }
1094         break;
1095       }
1096
1097   if (e == LONGINT_OK)
1098     {
1099       /* If multiple sort sizes are specified, take the maximum, so
1100          that option order does not matter.  */
1101       if (n < sort_size)
1102         return;
1103
1104       sort_size = n;
1105       if (sort_size == n)
1106         {
1107           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1108           return;
1109         }
1110
1111       e = LONGINT_OVERFLOW;
1112     }
1113
1114   xstrtol_fatal (e, oi, c, long_options, s);
1115 }
1116
1117 /* Return the default sort size.  */
1118 static size_t
1119 default_sort_size (void)
1120 {
1121   /* Let MEM be available memory or 1/8 of total memory, whichever
1122      is greater.  */
1123   double avail = physmem_available ();
1124   double total = physmem_total ();
1125   double mem = MAX (avail, total / 8);
1126   struct rlimit rlimit;
1127
1128   /* Let SIZE be MEM, but no more than the maximum object size or
1129      system resource limits.  Avoid the MIN macro here, as it is not
1130      quite right when only one argument is floating point.  Don't
1131      bother to check for values like RLIM_INFINITY since in practice
1132      they are not much less than SIZE_MAX.  */
1133   size_t size = SIZE_MAX;
1134   if (mem < size)
1135     size = mem;
1136   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1137     size = rlimit.rlim_cur;
1138 #ifdef RLIMIT_AS
1139   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1140     size = rlimit.rlim_cur;
1141 #endif
1142
1143   /* Leave a large safety margin for the above limits, as failure can
1144      occur when they are exceeded.  */
1145   size /= 2;
1146
1147 #ifdef RLIMIT_RSS
1148   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1149      Exceeding RSS is not fatal, but can be quite slow.  */
1150   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1151     size = rlimit.rlim_cur / 16 * 15;
1152 #endif
1153
1154   /* Use no less than the minimum.  */
1155   return MAX (size, MIN_SORT_SIZE);
1156 }
1157
1158 /* Return the sort buffer size to use with the input files identified
1159    by FPS and FILES, which are alternate names of the same files.
1160    NFILES gives the number of input files; NFPS may be less.  Assume
1161    that each input line requires LINE_BYTES extra bytes' worth of line
1162    information.  Do not exceed the size bound specified by the user
1163    (or a default size bound, if the user does not specify one).  */
1164
1165 static size_t
1166 sort_buffer_size (FILE *const *fps, size_t nfps,
1167                   char *const *files, size_t nfiles,
1168                   size_t line_bytes)
1169 {
1170   /* A bound on the input size.  If zero, the bound hasn't been
1171      determined yet.  */
1172   static size_t size_bound;
1173
1174   /* In the worst case, each input byte is a newline.  */
1175   size_t worst_case_per_input_byte = line_bytes + 1;
1176
1177   /* Keep enough room for one extra input line and an extra byte.
1178      This extra room might be needed when preparing to read EOF.  */
1179   size_t size = worst_case_per_input_byte + 1;
1180
1181   size_t i;
1182
1183   for (i = 0; i < nfiles; i++)
1184     {
1185       struct stat st;
1186       off_t file_size;
1187       size_t worst_case;
1188
1189       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1190            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1191            : stat (files[i], &st))
1192           != 0)
1193         die (_("stat failed"), files[i]);
1194
1195       if (S_ISREG (st.st_mode))
1196         file_size = st.st_size;
1197       else
1198         {
1199           /* The file has unknown size.  If the user specified a sort
1200              buffer size, use that; otherwise, guess the size.  */
1201           if (sort_size)
1202             return sort_size;
1203           file_size = INPUT_FILE_SIZE_GUESS;
1204         }
1205
1206       if (! size_bound)
1207         {
1208           size_bound = sort_size;
1209           if (! size_bound)
1210             size_bound = default_sort_size ();
1211         }
1212
1213       /* Add the amount of memory needed to represent the worst case
1214          where the input consists entirely of newlines followed by a
1215          single non-newline.  Check for overflow.  */
1216       worst_case = file_size * worst_case_per_input_byte + 1;
1217       if (file_size != worst_case / worst_case_per_input_byte
1218           || size_bound - size <= worst_case)
1219         return size_bound;
1220       size += worst_case;
1221     }
1222
1223   return size;
1224 }
1225
1226 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1227    must be at least sizeof (struct line).  Allocate ALLOC bytes
1228    initially.  */
1229
1230 static void
1231 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1232 {
1233   /* Ensure that the line array is properly aligned.  If the desired
1234      size cannot be allocated, repeatedly halve it until allocation
1235      succeeds.  The smaller allocation may hurt overall performance,
1236      but that's better than failing.  */
1237   for (;;)
1238     {
1239       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1240       buf->buf = malloc (alloc);
1241       if (buf->buf)
1242         break;
1243       alloc /= 2;
1244       if (alloc <= line_bytes + 1)
1245         xalloc_die ();
1246     }
1247
1248   buf->line_bytes = line_bytes;
1249   buf->alloc = alloc;
1250   buf->used = buf->left = buf->nlines = 0;
1251   buf->eof = false;
1252 }
1253
1254 /* Return one past the limit of the line array.  */
1255
1256 static inline struct line *
1257 buffer_linelim (struct buffer const *buf)
1258 {
1259   return (struct line *) (buf->buf + buf->alloc);
1260 }
1261
1262 /* Return a pointer to the first character of the field specified
1263    by KEY in LINE. */
1264
1265 static char *
1266 begfield (const struct line *line, const struct keyfield *key)
1267 {
1268   char *ptr = line->text, *lim = ptr + line->length - 1;
1269   size_t sword = key->sword;
1270   size_t schar = key->schar;
1271   size_t remaining_bytes;
1272
1273   /* The leading field separator itself is included in a field when -t
1274      is absent.  */
1275
1276   if (tab != TAB_DEFAULT)
1277     while (ptr < lim && sword--)
1278       {
1279         while (ptr < lim && *ptr != tab)
1280           ++ptr;
1281         if (ptr < lim)
1282           ++ptr;
1283       }
1284   else
1285     while (ptr < lim && sword--)
1286       {
1287         while (ptr < lim && blanks[to_uchar (*ptr)])
1288           ++ptr;
1289         while (ptr < lim && !blanks[to_uchar (*ptr)])
1290           ++ptr;
1291       }
1292
1293   if (key->skipsblanks)
1294     while (ptr < lim && blanks[to_uchar (*ptr)])
1295       ++ptr;
1296
1297   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1298   remaining_bytes = lim - ptr;
1299   if (schar < remaining_bytes)
1300     ptr += schar;
1301   else
1302     ptr = lim;
1303
1304   return ptr;
1305 }
1306
1307 /* Return the limit of (a pointer to the first character after) the field
1308    in LINE specified by KEY. */
1309
1310 static char *
1311 limfield (const struct line *line, const struct keyfield *key)
1312 {
1313   char *ptr = line->text, *lim = ptr + line->length - 1;
1314   size_t eword = key->eword, echar = key->echar;
1315   size_t remaining_bytes;
1316
1317   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1318      whichever comes first.  If there are more than EWORD fields, leave
1319      PTR pointing at the beginning of the field having zero-based index,
1320      EWORD.  If a delimiter character was specified (via -t), then that
1321      `beginning' is the first character following the delimiting TAB.
1322      Otherwise, leave PTR pointing at the first `blank' character after
1323      the preceding field.  */
1324   if (tab != TAB_DEFAULT)
1325     while (ptr < lim && eword--)
1326       {
1327         while (ptr < lim && *ptr != tab)
1328           ++ptr;
1329         if (ptr < lim && (eword | echar))
1330           ++ptr;
1331       }
1332   else
1333     while (ptr < lim && eword--)
1334       {
1335         while (ptr < lim && blanks[to_uchar (*ptr)])
1336           ++ptr;
1337         while (ptr < lim && !blanks[to_uchar (*ptr)])
1338           ++ptr;
1339       }
1340
1341 #ifdef POSIX_UNSPECIFIED
1342   /* The following block of code makes GNU sort incompatible with
1343      standard Unix sort, so it's ifdef'd out for now.
1344      The POSIX spec isn't clear on how to interpret this.
1345      FIXME: request clarification.
1346
1347      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1348      Date: Thu, 30 May 96 12:20:41 -0400
1349      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1350
1351      [...]I believe I've found another bug in `sort'.
1352
1353      $ cat /tmp/sort.in
1354      a b c 2 d
1355      pq rs 1 t
1356      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1357      a b c 2 d
1358      pq rs 1 t
1359      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1360      pq rs 1 t
1361      a b c 2 d
1362
1363      Unix sort produced the answer I expected: sort on the single character
1364      in column 7.  GNU sort produced different results, because it disagrees
1365      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1366      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1367      "skip M-1 fields, then either N-1 characters or the rest of the current
1368      field, whichever comes first".  This extra clause applies only to
1369      key-ends, not key-starts.
1370      */
1371
1372   /* Make LIM point to the end of (one byte past) the current field.  */
1373   if (tab != TAB_DEFAULT)
1374     {
1375       char *newlim;
1376       newlim = memchr (ptr, tab, lim - ptr);
1377       if (newlim)
1378         lim = newlim;
1379     }
1380   else
1381     {
1382       char *newlim;
1383       newlim = ptr;
1384       while (newlim < lim && blanks[to_uchar (*newlim)])
1385         ++newlim;
1386       while (newlim < lim && !blanks[to_uchar (*newlim)])
1387         ++newlim;
1388       lim = newlim;
1389     }
1390 #endif
1391
1392   /* If we're ignoring leading blanks when computing the End
1393      of the field, don't start counting bytes until after skipping
1394      past any leading blanks. */
1395   if (key->skipeblanks)
1396     while (ptr < lim && blanks[to_uchar (*ptr)])
1397       ++ptr;
1398
1399   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1400   remaining_bytes = lim - ptr;
1401   if (echar < remaining_bytes)
1402     ptr += echar;
1403   else
1404     ptr = lim;
1405
1406   return ptr;
1407 }
1408
1409 /* Fill BUF reading from FP, moving buf->left bytes from the end
1410    of buf->buf to the beginning first.  If EOF is reached and the
1411    file wasn't terminated by a newline, supply one.  Set up BUF's line
1412    table too.  FILE is the name of the file corresponding to FP.
1413    Return true if some input was read.  */
1414
1415 static bool
1416 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1417 {
1418   struct keyfield const *key = keylist;
1419   char eol = eolchar;
1420   size_t line_bytes = buf->line_bytes;
1421   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1422
1423   if (buf->eof)
1424     return false;
1425
1426   if (buf->used != buf->left)
1427     {
1428       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1429       buf->used = buf->left;
1430       buf->nlines = 0;
1431     }
1432
1433   for (;;)
1434     {
1435       char *ptr = buf->buf + buf->used;
1436       struct line *linelim = buffer_linelim (buf);
1437       struct line *line = linelim - buf->nlines;
1438       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1439       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1440
1441       while (line_bytes + 1 < avail)
1442         {
1443           /* Read as many bytes as possible, but do not read so many
1444              bytes that there might not be enough room for the
1445              corresponding line array.  The worst case is when the
1446              rest of the input file consists entirely of newlines,
1447              except that the last byte is not a newline.  */
1448           size_t readsize = (avail - 1) / (line_bytes + 1);
1449           size_t bytes_read = fread (ptr, 1, readsize, fp);
1450           char *ptrlim = ptr + bytes_read;
1451           char *p;
1452           avail -= bytes_read;
1453
1454           if (bytes_read != readsize)
1455             {
1456               if (ferror (fp))
1457                 die (_("read failed"), file);
1458               if (feof (fp))
1459                 {
1460                   buf->eof = true;
1461                   if (buf->buf == ptrlim)
1462                     return false;
1463                   if (ptrlim[-1] != eol)
1464                     *ptrlim++ = eol;
1465                 }
1466             }
1467
1468           /* Find and record each line in the just-read input.  */
1469           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1470             {
1471               ptr = p + 1;
1472               line--;
1473               line->text = line_start;
1474               line->length = ptr - line_start;
1475               mergesize = MAX (mergesize, line->length);
1476               avail -= line_bytes;
1477
1478               if (key)
1479                 {
1480                   /* Precompute the position of the first key for
1481                      efficiency.  */
1482                   line->keylim = (key->eword == SIZE_MAX
1483                                   ? p
1484                                   : limfield (line, key));
1485
1486                   if (key->sword != SIZE_MAX)
1487                     line->keybeg = begfield (line, key);
1488                   else
1489                     {
1490                       if (key->skipsblanks)
1491                         while (blanks[to_uchar (*line_start)])
1492                           line_start++;
1493                       line->keybeg = line_start;
1494                     }
1495                 }
1496
1497               line_start = ptr;
1498             }
1499
1500           ptr = ptrlim;
1501           if (buf->eof)
1502             break;
1503         }
1504
1505       buf->used = ptr - buf->buf;
1506       buf->nlines = buffer_linelim (buf) - line;
1507       if (buf->nlines != 0)
1508         {
1509           buf->left = ptr - line_start;
1510           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1511           return true;
1512         }
1513
1514       {
1515         /* The current input line is too long to fit in the buffer.
1516            Double the buffer size and try again, keeping it properly
1517            aligned.  */
1518         size_t line_alloc = buf->alloc / sizeof (struct line);
1519         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1520         buf->alloc = line_alloc * sizeof (struct line);
1521       }
1522     }
1523 }
1524
1525 /* Compare strings A and B as numbers without explicitly converting them to
1526    machine numbers.  Comparatively slow for short strings, but asymptotically
1527    hideously fast. */
1528
1529 static int
1530 numcompare (const char *a, const char *b)
1531 {
1532   while (blanks[to_uchar (*a)])
1533     a++;
1534   while (blanks[to_uchar (*b)])
1535     b++;
1536
1537   return strnumcmp (a, b, decimal_point, thousands_sep);
1538 }
1539
1540 static int
1541 general_numcompare (const char *sa, const char *sb)
1542 {
1543   /* FIXME: add option to warn about failed conversions.  */
1544   /* FIXME: maybe add option to try expensive FP conversion
1545      only if A and B can't be compared more cheaply/accurately.  */
1546
1547   char *ea;
1548   char *eb;
1549   double a = strtod (sa, &ea);
1550   double b = strtod (sb, &eb);
1551
1552   /* Put conversion errors at the start of the collating sequence.  */
1553   if (sa == ea)
1554     return sb == eb ? 0 : -1;
1555   if (sb == eb)
1556     return 1;
1557
1558   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1559      conversion errors but before numbers; sort them by internal
1560      bit-pattern, for lack of a more portable alternative.  */
1561   return (a < b ? -1
1562           : a > b ? 1
1563           : a == b ? 0
1564           : b == b ? -1
1565           : a == a ? 1
1566           : memcmp ((char *) &a, (char *) &b, sizeof a));
1567 }
1568
1569 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1570    Return 0 if the name in S is not recognized.  */
1571
1572 static int
1573 getmonth (char const *month, size_t len)
1574 {
1575   size_t lo = 0;
1576   size_t hi = MONTHS_PER_YEAR;
1577   char const *monthlim = month + len;
1578
1579   for (;;)
1580     {
1581       if (month == monthlim)
1582         return 0;
1583       if (!blanks[to_uchar (*month)])
1584         break;
1585       ++month;
1586     }
1587
1588   do
1589     {
1590       size_t ix = (lo + hi) / 2;
1591       char const *m = month;
1592       char const *n = monthtab[ix].name;
1593
1594       for (;; m++, n++)
1595         {
1596           if (!*n)
1597             return monthtab[ix].val;
1598           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1599             {
1600               hi = ix;
1601               break;
1602             }
1603           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1604             {
1605               lo = ix + 1;
1606               break;
1607             }
1608         }
1609     }
1610   while (lo < hi);
1611
1612   return 0;
1613 }
1614
1615 /* A source of random data.  */
1616 static struct randread_source *randread_source;
1617
1618 /* Return the Ith randomly-generated state.  The caller must invoke
1619    random_state (H) for all H less than I before invoking random_state
1620    (I).  */
1621
1622 static struct md5_ctx
1623 random_state (size_t i)
1624 {
1625   /* An array of states resulting from the random data, and counts of
1626      its used and allocated members.  */
1627   static struct md5_ctx *state;
1628   static size_t used;
1629   static size_t allocated;
1630
1631   struct md5_ctx *s = &state[i];
1632
1633   if (used <= i)
1634     {
1635       unsigned char buf[MD5_DIGEST_SIZE];
1636
1637       used++;
1638
1639       if (allocated <= i)
1640         {
1641           state = X2NREALLOC (state, &allocated);
1642           s = &state[i];
1643         }
1644
1645       randread (randread_source, buf, sizeof buf);
1646       md5_init_ctx (s);
1647       md5_process_bytes (buf, sizeof buf, s);
1648     }
1649
1650   return *s;
1651 }
1652
1653 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1654    with length LENGTHB.  Return negative if less, zero if equal,
1655    positive if greater.  */
1656
1657 static int
1658 cmp_hashes (char const *texta, size_t lena,
1659             char const *textb, size_t lenb)
1660 {
1661   /* Try random hashes until a pair of hashes disagree.  But if the
1662      first pair of random hashes agree, check whether the keys are
1663      identical and if so report no difference.  */
1664   int diff;
1665   size_t i;
1666   for (i = 0; ; i++)
1667     {
1668       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1669       struct md5_ctx s[2];
1670       s[0] = s[1] = random_state (i);
1671       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1672       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1673       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1674       if (diff != 0)
1675         break;
1676       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1677         break;
1678     }
1679
1680   return diff;
1681 }
1682
1683 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1684    using one or more random hash functions.  */
1685
1686 static int
1687 compare_random (char *restrict texta, size_t lena,
1688                 char *restrict textb, size_t lenb)
1689 {
1690   int diff;
1691
1692   if (! hard_LC_COLLATE)
1693     diff = cmp_hashes (texta, lena, textb, lenb);
1694   else
1695     {
1696       /* Transform the text into the basis of comparison, so that byte
1697          strings that would otherwise considered to be equal are
1698          considered equal here even if their bytes differ.  */
1699
1700       char *buf = NULL;
1701       char stackbuf[4000];
1702       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1703       bool a_fits = tlena <= sizeof stackbuf;
1704       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1705                                (a_fits ? sizeof stackbuf - tlena : 0),
1706                                textb, lenb);
1707
1708       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1709         buf = stackbuf;
1710       else
1711         {
1712           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1713              faster by avoiding the need for an extra buffer copy.  */
1714           buf = xmalloc (tlena + tlenb + 1);
1715           xmemxfrm (buf, tlena + 1, texta, lena);
1716           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1717         }
1718
1719       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1720
1721       if (buf != stackbuf)
1722         free (buf);
1723     }
1724
1725   return diff;
1726 }
1727
1728 /* Compare two lines A and B trying every key in sequence until there
1729    are no more keys or a difference is found. */
1730
1731 static int
1732 keycompare (const struct line *a, const struct line *b)
1733 {
1734   struct keyfield const *key = keylist;
1735
1736   /* For the first iteration only, the key positions have been
1737      precomputed for us. */
1738   char *texta = a->keybeg;
1739   char *textb = b->keybeg;
1740   char *lima = a->keylim;
1741   char *limb = b->keylim;
1742
1743   int diff;
1744
1745   for (;;)
1746     {
1747       char const *translate = key->translate;
1748       bool const *ignore = key->ignore;
1749
1750       /* Find the lengths. */
1751       size_t lena = lima <= texta ? 0 : lima - texta;
1752       size_t lenb = limb <= textb ? 0 : limb - textb;
1753
1754       /* Actually compare the fields. */
1755
1756       if (key->random)
1757         diff = compare_random (texta, lena, textb, lenb);
1758       else if (key->numeric | key->general_numeric)
1759         {
1760           char savea = *lima, saveb = *limb;
1761
1762           *lima = *limb = '\0';
1763           diff = ((key->numeric ? numcompare : general_numcompare)
1764                   (texta, textb));
1765           *lima = savea, *limb = saveb;
1766         }
1767       else if (key->month)
1768         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1769       /* Sorting like this may become slow, so in a simple locale the user
1770          can select a faster sort that is similar to ascii sort.  */
1771       else if (hard_LC_COLLATE)
1772         {
1773           if (ignore || translate)
1774             {
1775               char buf[4000];
1776               size_t size = lena + 1 + lenb + 1;
1777               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1778               char *copy_b = copy_a + lena + 1;
1779               size_t new_len_a, new_len_b, i;
1780
1781               /* Ignore and/or translate chars before comparing.  */
1782               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1783                 {
1784                   if (i < lena)
1785                     {
1786                       copy_a[new_len_a] = (translate
1787                                            ? translate[to_uchar (texta[i])]
1788                                            : texta[i]);
1789                       if (!ignore || !ignore[to_uchar (texta[i])])
1790                         ++new_len_a;
1791                     }
1792                   if (i < lenb)
1793                     {
1794                       copy_b[new_len_b] = (translate
1795                                            ? translate[to_uchar (textb[i])]
1796                                            : textb [i]);
1797                       if (!ignore || !ignore[to_uchar (textb[i])])
1798                         ++new_len_b;
1799                     }
1800                 }
1801
1802               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1803
1804               if (sizeof buf < size)
1805                 free (copy_a);
1806             }
1807           else if (lena == 0)
1808             diff = - NONZERO (lenb);
1809           else if (lenb == 0)
1810             goto greater;
1811           else
1812             diff = xmemcoll (texta, lena, textb, lenb);
1813         }
1814       else if (ignore)
1815         {
1816 #define CMP_WITH_IGNORE(A, B)                                           \
1817   do                                                                    \
1818     {                                                                   \
1819           for (;;)                                                      \
1820             {                                                           \
1821               while (texta < lima && ignore[to_uchar (*texta)])         \
1822                 ++texta;                                                \
1823               while (textb < limb && ignore[to_uchar (*textb)])         \
1824                 ++textb;                                                \
1825               if (! (texta < lima && textb < limb))                     \
1826                 break;                                                  \
1827               diff = to_uchar (A) - to_uchar (B);                       \
1828               if (diff)                                                 \
1829                 goto not_equal;                                         \
1830               ++texta;                                                  \
1831               ++textb;                                                  \
1832             }                                                           \
1833                                                                         \
1834           diff = (texta < lima) - (textb < limb);                       \
1835     }                                                                   \
1836   while (0)
1837
1838           if (translate)
1839             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1840                              translate[to_uchar (*textb)]);
1841           else
1842             CMP_WITH_IGNORE (*texta, *textb);
1843         }
1844       else if (lena == 0)
1845         diff = - NONZERO (lenb);
1846       else if (lenb == 0)
1847         goto greater;
1848       else
1849         {
1850           if (translate)
1851             {
1852               while (texta < lima && textb < limb)
1853                 {
1854                   diff = (to_uchar (translate[to_uchar (*texta++)])
1855                           - to_uchar (translate[to_uchar (*textb++)]));
1856                   if (diff)
1857                     goto not_equal;
1858                 }
1859             }
1860           else
1861             {
1862               diff = memcmp (texta, textb, MIN (lena, lenb));
1863               if (diff)
1864                 goto not_equal;
1865             }
1866           diff = lena < lenb ? -1 : lena != lenb;
1867         }
1868
1869       if (diff)
1870         goto not_equal;
1871
1872       key = key->next;
1873       if (! key)
1874         break;
1875
1876       /* Find the beginning and limit of the next field.  */
1877       if (key->eword != SIZE_MAX)
1878         lima = limfield (a, key), limb = limfield (b, key);
1879       else
1880         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1881
1882       if (key->sword != SIZE_MAX)
1883         texta = begfield (a, key), textb = begfield (b, key);
1884       else
1885         {
1886           texta = a->text, textb = b->text;
1887           if (key->skipsblanks)
1888             {
1889               while (texta < lima && blanks[to_uchar (*texta)])
1890                 ++texta;
1891               while (textb < limb && blanks[to_uchar (*textb)])
1892                 ++textb;
1893             }
1894         }
1895     }
1896
1897   return 0;
1898
1899  greater:
1900   diff = 1;
1901  not_equal:
1902   return key->reverse ? -diff : diff;
1903 }
1904
1905 /* Compare two lines A and B, returning negative, zero, or positive
1906    depending on whether A compares less than, equal to, or greater than B. */
1907
1908 static int
1909 compare (const struct line *a, const struct line *b)
1910 {
1911   int diff;
1912   size_t alen, blen;
1913
1914   /* First try to compare on the specified keys (if any).
1915      The only two cases with no key at all are unadorned sort,
1916      and unadorned sort -r. */
1917   if (keylist)
1918     {
1919       diff = keycompare (a, b);
1920       if (diff | unique | stable)
1921         return diff;
1922     }
1923
1924   /* If the keys all compare equal (or no keys were specified)
1925      fall through to the default comparison.  */
1926   alen = a->length - 1, blen = b->length - 1;
1927
1928   if (alen == 0)
1929     diff = - NONZERO (blen);
1930   else if (blen == 0)
1931     diff = 1;
1932   else if (hard_LC_COLLATE)
1933     diff = xmemcoll (a->text, alen, b->text, blen);
1934   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1935     diff = alen < blen ? -1 : alen != blen;
1936
1937   return reverse ? -diff : diff;
1938 }
1939
1940 /* Check that the lines read from FILE_NAME come in order.  Return
1941    true if they are in order.  If CHECKONLY == 'c', also print a
1942    diagnostic (FILE_NAME, line number, contents of line) to stderr if
1943    they are not in order.  */
1944
1945 static bool
1946 check (char const *file_name, char checkonly)
1947 {
1948   FILE *fp = xfopen (file_name, "r");
1949   struct buffer buf;            /* Input buffer. */
1950   struct line temp;             /* Copy of previous line. */
1951   size_t alloc = 0;
1952   uintmax_t line_number = 0;
1953   struct keyfield const *key = keylist;
1954   bool nonunique = ! unique;
1955   bool ordered = true;
1956
1957   initbuf (&buf, sizeof (struct line),
1958            MAX (merge_buffer_size, sort_size));
1959   temp.text = NULL;
1960
1961   while (fillbuf (&buf, fp, file_name))
1962     {
1963       struct line const *line = buffer_linelim (&buf);
1964       struct line const *linebase = line - buf.nlines;
1965
1966       /* Make sure the line saved from the old buffer contents is
1967          less than or equal to the first line of the new buffer. */
1968       if (alloc && nonunique <= compare (&temp, line - 1))
1969         {
1970         found_disorder:
1971           {
1972             if (checkonly == 'c')
1973               {
1974                 struct line const *disorder_line = line - 1;
1975                 uintmax_t disorder_line_number =
1976                   buffer_linelim (&buf) - disorder_line + line_number;
1977                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1978                 fprintf (stderr, _("%s: %s:%s: disorder: "),
1979                          program_name, file_name,
1980                          umaxtostr (disorder_line_number, hr_buf));
1981                 write_bytes (disorder_line->text, disorder_line->length,
1982                              stderr, _("standard error"));
1983               }
1984
1985             ordered = false;
1986             break;
1987           }
1988         }
1989
1990       /* Compare each line in the buffer with its successor.  */
1991       while (linebase < --line)
1992         if (nonunique <= compare (line, line - 1))
1993           goto found_disorder;
1994
1995       line_number += buf.nlines;
1996
1997       /* Save the last line of the buffer.  */
1998       if (alloc < line->length)
1999         {
2000           do
2001             {
2002               alloc *= 2;
2003               if (! alloc)
2004                 {
2005                   alloc = line->length;
2006                   break;
2007                 }
2008             }
2009           while (alloc < line->length);
2010
2011           temp.text = xrealloc (temp.text, alloc);
2012         }
2013       memcpy (temp.text, line->text, line->length);
2014       temp.length = line->length;
2015       if (key)
2016         {
2017           temp.keybeg = temp.text + (line->keybeg - line->text);
2018           temp.keylim = temp.text + (line->keylim - line->text);
2019         }
2020     }
2021
2022   xfclose (fp, file_name);
2023   free (buf.buf);
2024   free (temp.text);
2025   return ordered;
2026 }
2027
2028 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2029    files (all of which are at the start of the FILES array), and
2030    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2031    Close input and output files before returning.
2032    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2033    the output file is standard output.  If OFP is NULL, the output
2034    file has not been opened yet (or written to, if standard output).  */
2035
2036 static void
2037 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2038           FILE *ofp, char const *output_file)
2039 {
2040   FILE *fps[NMERGE];            /* Input streams for each file.  */
2041   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2042   struct line saved;            /* Saved line storage for unique check. */
2043   struct line const *savedline = NULL;
2044                                 /* &saved if there is a saved line. */
2045   size_t savealloc = 0;         /* Size allocated for the saved line. */
2046   struct line const *cur[NMERGE]; /* Current line in each line table. */
2047   struct line const *base[NMERGE]; /* Base of each line table.  */
2048   size_t ord[NMERGE];           /* Table representing a permutation of fps,
2049                                    such that cur[ord[0]] is the smallest line
2050                                    and will be next output. */
2051   size_t i;
2052   size_t j;
2053   size_t t;
2054   struct keyfield const *key = keylist;
2055   saved.text = NULL;
2056
2057   /* Read initial lines from each input file. */
2058   for (i = 0; i < nfiles; )
2059     {
2060       fps[i] = (files[i].pid
2061                 ? open_temp (files[i].name, files[i].pid)
2062                 : xfopen (files[i].name, "r"));
2063       initbuf (&buffer[i], sizeof (struct line),
2064                MAX (merge_buffer_size, sort_size / nfiles));
2065       if (fillbuf (&buffer[i], fps[i], files[i].name))
2066         {
2067           struct line const *linelim = buffer_linelim (&buffer[i]);
2068           cur[i] = linelim - 1;
2069           base[i] = linelim - buffer[i].nlines;
2070           i++;
2071         }
2072       else
2073         {
2074           /* fps[i] is empty; eliminate it from future consideration.  */
2075           xfclose (fps[i], files[i].name);
2076           if (i < ntemps)
2077             {
2078               ntemps--;
2079               zaptemp (files[i].name);
2080             }
2081           free (buffer[i].buf);
2082           --nfiles;
2083           for (j = i; j < nfiles; ++j)
2084             files[j] = files[j + 1];
2085         }
2086     }
2087
2088   if (! ofp)
2089     ofp = xfopen (output_file, "w");
2090
2091   /* Set up the ord table according to comparisons among input lines.
2092      Since this only reorders two items if one is strictly greater than
2093      the other, it is stable. */
2094   for (i = 0; i < nfiles; ++i)
2095     ord[i] = i;
2096   for (i = 1; i < nfiles; ++i)
2097     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2098       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2099
2100   /* Repeatedly output the smallest line until no input remains. */
2101   while (nfiles)
2102     {
2103       struct line const *smallest = cur[ord[0]];
2104
2105       /* If uniquified output is turned on, output only the first of
2106          an identical series of lines. */
2107       if (unique)
2108         {
2109           if (savedline && compare (savedline, smallest))
2110             {
2111               savedline = NULL;
2112               write_bytes (saved.text, saved.length, ofp, output_file);
2113             }
2114           if (!savedline)
2115             {
2116               savedline = &saved;
2117               if (savealloc < smallest->length)
2118                 {
2119                   do
2120                     if (! savealloc)
2121                       {
2122                         savealloc = smallest->length;
2123                         break;
2124                       }
2125                   while ((savealloc *= 2) < smallest->length);
2126
2127                   saved.text = xrealloc (saved.text, savealloc);
2128                 }
2129               saved.length = smallest->length;
2130               memcpy (saved.text, smallest->text, saved.length);
2131               if (key)
2132                 {
2133                   saved.keybeg =
2134                     saved.text + (smallest->keybeg - smallest->text);
2135                   saved.keylim =
2136                     saved.text + (smallest->keylim - smallest->text);
2137                 }
2138             }
2139         }
2140       else
2141         write_bytes (smallest->text, smallest->length, ofp, output_file);
2142
2143       /* Check if we need to read more lines into core. */
2144       if (base[ord[0]] < smallest)
2145         cur[ord[0]] = smallest - 1;
2146       else
2147         {
2148           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2149             {
2150               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2151               cur[ord[0]] = linelim - 1;
2152               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2153             }
2154           else
2155             {
2156               /* We reached EOF on fps[ord[0]].  */
2157               for (i = 1; i < nfiles; ++i)
2158                 if (ord[i] > ord[0])
2159                   --ord[i];
2160               --nfiles;
2161               xfclose (fps[ord[0]], files[ord[0]].name);
2162               if (ord[0] < ntemps)
2163                 {
2164                   ntemps--;
2165                   zaptemp (files[ord[0]].name);
2166                 }
2167               free (buffer[ord[0]].buf);
2168               for (i = ord[0]; i < nfiles; ++i)
2169                 {
2170                   fps[i] = fps[i + 1];
2171                   files[i] = files[i + 1];
2172                   buffer[i] = buffer[i + 1];
2173                   cur[i] = cur[i + 1];
2174                   base[i] = base[i + 1];
2175                 }
2176               for (i = 0; i < nfiles; ++i)
2177                 ord[i] = ord[i + 1];
2178               continue;
2179             }
2180         }
2181
2182       /* The new line just read in may be larger than other lines
2183          already in main memory; push it back in the queue until we
2184          encounter a line larger than it.  Optimize for the common
2185          case where the new line is smallest.  */
2186       {
2187         size_t lo = 1;
2188         size_t hi = nfiles;
2189         size_t probe = lo;
2190         size_t ord0 = ord[0];
2191         size_t count_of_smaller_lines;
2192
2193         while (lo < hi)
2194           {
2195             int cmp = compare (cur[ord0], cur[ord[probe]]);
2196             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2197               hi = probe;
2198             else
2199               lo = probe + 1;
2200             probe = (lo + hi) / 2;
2201           }
2202
2203         count_of_smaller_lines = lo - 1;
2204         for (j = 0; j < count_of_smaller_lines; j++)
2205           ord[j] = ord[j + 1];
2206         ord[count_of_smaller_lines] = ord0;
2207       }
2208
2209       /* Free up some resources every once in a while.  */
2210       if (MAX_PROCS_BEFORE_REAP < nprocs)
2211         reap_some ();
2212     }
2213
2214   if (unique && savedline)
2215     {
2216       write_bytes (saved.text, saved.length, ofp, output_file);
2217       free (saved.text);
2218     }
2219
2220   xfclose (ofp, output_file);
2221 }
2222
2223 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2224    and HI (with NHI members).  T, LO, and HI point just past their
2225    respective arrays, and the arrays are in reverse order.  NLO and
2226    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2227
2228 static inline void
2229 mergelines (struct line *t,
2230             struct line const *lo, size_t nlo,
2231             struct line const *hi, size_t nhi)
2232 {
2233   for (;;)
2234     if (compare (lo - 1, hi - 1) <= 0)
2235       {
2236         *--t = *--lo;
2237         if (! --nlo)
2238           {
2239             /* HI - NHI equalled T - (NLO + NHI) when this function
2240                began.  Therefore HI must equal T now, and there is no
2241                need to copy from HI to T.  */
2242             return;
2243           }
2244       }
2245     else
2246       {
2247         *--t = *--hi;
2248         if (! --nhi)
2249           {
2250             do
2251               *--t = *--lo;
2252             while (--nlo);
2253
2254             return;
2255           }
2256       }
2257 }
2258
2259 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2260    NLINES must be at least 2.
2261    The input and output arrays are in reverse order, and LINES and
2262    TEMP point just past the end of their respective arrays.
2263
2264    Use a recursive divide-and-conquer algorithm, in the style
2265    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2266    the optimization suggested by exercise 5.2.4-10; this requires room
2267    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2268    writes that this memory optimization was originally published by
2269    D. A. Bell, Comp J. 1 (1958), 75.  */
2270
2271 static void
2272 sortlines (struct line *lines, size_t nlines, struct line *temp)
2273 {
2274   if (nlines == 2)
2275     {
2276       if (0 < compare (&lines[-1], &lines[-2]))
2277         {
2278           struct line tmp = lines[-1];
2279           lines[-1] = lines[-2];
2280           lines[-2] = tmp;
2281         }
2282     }
2283   else
2284     {
2285       size_t nlo = nlines / 2;
2286       size_t nhi = nlines - nlo;
2287       struct line *lo = lines;
2288       struct line *hi = lines - nlo;
2289       struct line *sorted_lo = temp;
2290
2291       sortlines (hi, nhi, temp);
2292       if (1 < nlo)
2293         sortlines_temp (lo, nlo, sorted_lo);
2294       else
2295         sorted_lo[-1] = lo[-1];
2296
2297       mergelines (lines, sorted_lo, nlo, hi, nhi);
2298     }
2299 }
2300
2301 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2302    rather than sorting in place.  */
2303
2304 static void
2305 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2306 {
2307   if (nlines == 2)
2308     {
2309       /* Declare `swap' as int, not bool, to work around a bug
2310          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2311          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2312       int swap = (0 < compare (&lines[-1], &lines[-2]));
2313       temp[-1] = lines[-1 - swap];
2314       temp[-2] = lines[-2 + swap];
2315     }
2316   else
2317     {
2318       size_t nlo = nlines / 2;
2319       size_t nhi = nlines - nlo;
2320       struct line *lo = lines;
2321       struct line *hi = lines - nlo;
2322       struct line *sorted_hi = temp - nlo;
2323
2324       sortlines_temp (hi, nhi, sorted_hi);
2325       if (1 < nlo)
2326         sortlines (lo, nlo, temp);
2327
2328       mergelines (temp, lo, nlo, sorted_hi, nhi);
2329     }
2330 }
2331
2332 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2333    the same as OUTFILE.  If found, merge the found instances (and perhaps
2334    some other files) into a temporary file so that it can in turn be
2335    merged into OUTFILE without destroying OUTFILE before it is completely
2336    read.  Return the new value of NFILES, which differs from the old if
2337    some merging occurred.
2338
2339    This test ensures that an otherwise-erroneous use like
2340    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2341    It's not clear that POSIX requires this nicety.
2342    Detect common error cases, but don't try to catch obscure cases like
2343    "cat ... FILE ... | sort -m -o FILE"
2344    where traditional "sort" doesn't copy the input and where
2345    people should know that they're getting into trouble anyway.
2346    Catching these obscure cases would slow down performance in
2347    common cases.  */
2348
2349 static size_t
2350 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2351                       size_t nfiles, char const *outfile)
2352 {
2353   size_t i;
2354   bool got_outstat = false;
2355   struct stat outstat;
2356
2357   for (i = ntemps; i < nfiles; i++)
2358     {
2359       bool is_stdin = STREQ (files[i].name, "-");
2360       bool same;
2361       struct stat instat;
2362
2363       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2364         same = true;
2365       else
2366         {
2367           if (! got_outstat)
2368             {
2369               if ((outfile
2370                    ? stat (outfile, &outstat)
2371                    : fstat (STDOUT_FILENO, &outstat))
2372                   != 0)
2373                 break;
2374               got_outstat = true;
2375             }
2376
2377           same = (((is_stdin
2378                     ? fstat (STDIN_FILENO, &instat)
2379                     : stat (files[i].name, &instat))
2380                    == 0)
2381                   && SAME_INODE (instat, outstat));
2382         }
2383
2384       if (same)
2385         {
2386           FILE *tftp;
2387           pid_t pid;
2388           char *temp = create_temp (&tftp, &pid);
2389           mergefps (&files[i],0, nfiles - i, tftp, temp);
2390           files[i].name = temp;
2391           files[i].pid = pid;
2392           return i + 1;
2393         }
2394     }
2395
2396   return nfiles;
2397 }
2398
2399 /* Merge the input FILES.  NTEMPS is the number of files at the
2400    start of FILES that are temporary; it is zero at the top level.
2401    NFILES is the total number of files.  Put the output in
2402    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2403
2404 static void
2405 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2406        char const *output_file)
2407 {
2408   while (NMERGE < nfiles)
2409     {
2410       /* Number of input files processed so far.  */
2411       size_t in;
2412
2413       /* Number of output files generated so far.  */
2414       size_t out;
2415
2416       /* nfiles % NMERGE; this counts input files that are left over
2417          after all full-sized merges have been done.  */
2418       size_t remainder;
2419
2420       /* Number of easily-available slots at the next loop iteration.  */
2421       size_t cheap_slots;
2422
2423       /* Do as many NMERGE-size merges as possible.  */
2424       for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2425         {
2426           FILE *tfp;
2427           pid_t pid;
2428           char *temp = create_temp (&tfp, &pid);
2429           size_t nt = MIN (ntemps, NMERGE);
2430           ntemps -= nt;
2431           mergefps (&files[in], nt, NMERGE, tfp, temp);
2432           files[out].name = temp;
2433           files[out].pid = pid;
2434         }
2435
2436       remainder = nfiles - in;
2437       cheap_slots = NMERGE - out % NMERGE;
2438
2439       if (cheap_slots < remainder)
2440         {
2441           /* So many files remain that they can't all be put into the last
2442              NMERGE-sized output window.  Do one more merge.  Merge as few
2443              files as possible, to avoid needless I/O.  */
2444           size_t nshortmerge = remainder - cheap_slots + 1;
2445           FILE *tfp;
2446           pid_t pid;
2447           char *temp = create_temp (&tfp, &pid);
2448           size_t nt = MIN (ntemps, nshortmerge);
2449           ntemps -= nt;
2450           mergefps (&files[in], nt, nshortmerge, tfp, temp);
2451           files[out].name = temp;
2452           files[out++].pid = pid;
2453           in += nshortmerge;
2454         }
2455
2456       /* Put the remaining input files into the last NMERGE-sized output
2457          window, so they will be merged in the next pass.  */
2458       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2459       ntemps += out;
2460       nfiles -= in - out;
2461     }
2462
2463   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2464   mergefps (files, ntemps, nfiles, NULL, output_file);
2465 }
2466
2467 /* Sort NFILES FILES onto OUTPUT_FILE. */
2468
2469 static void
2470 sort (char * const *files, size_t nfiles, char const *output_file)
2471 {
2472   struct buffer buf;
2473   size_t ntemps = 0;
2474   bool output_file_created = false;
2475
2476   buf.alloc = 0;
2477
2478   while (nfiles)
2479     {
2480       char const *temp_output;
2481       char const *file = *files;
2482       FILE *fp = xfopen (file, "r");
2483       FILE *tfp;
2484       size_t bytes_per_line = (2 * sizeof (struct line)
2485                                - sizeof (struct line) / 2);
2486
2487       if (! buf.alloc)
2488         initbuf (&buf, bytes_per_line,
2489                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2490       buf.eof = false;
2491       files++;
2492       nfiles--;
2493
2494       while (fillbuf (&buf, fp, file))
2495         {
2496           struct line *line;
2497           struct line *linebase;
2498
2499           if (buf.eof && nfiles
2500               && (bytes_per_line + 1
2501                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2502             {
2503               /* End of file, but there is more input and buffer room.
2504                  Concatenate the next input file; this is faster in
2505                  the usual case.  */
2506               buf.left = buf.used;
2507               break;
2508             }
2509
2510           line = buffer_linelim (&buf);
2511           linebase = line - buf.nlines;
2512           if (1 < buf.nlines)
2513             sortlines (line, buf.nlines, linebase);
2514           if (buf.eof && !nfiles && !ntemps && !buf.left)
2515             {
2516               xfclose (fp, file);
2517               tfp = xfopen (output_file, "w");
2518               temp_output = output_file;
2519               output_file_created = true;
2520             }
2521           else
2522             {
2523               ++ntemps;
2524               temp_output = create_temp (&tfp, NULL);
2525             }
2526
2527           do
2528             {
2529               line--;
2530               write_bytes (line->text, line->length, tfp, temp_output);
2531               if (unique)
2532                 while (linebase < line && compare (line, line - 1) == 0)
2533                   line--;
2534             }
2535           while (linebase < line);
2536
2537           xfclose (tfp, temp_output);
2538
2539           /* Free up some resources every once in a while.  */
2540           if (MAX_PROCS_BEFORE_REAP < nprocs)
2541             reap_some ();
2542
2543           if (output_file_created)
2544             goto finish;
2545         }
2546       xfclose (fp, file);
2547     }
2548
2549  finish:
2550   free (buf.buf);
2551
2552   if (! output_file_created)
2553     {
2554       size_t i;
2555       struct tempnode *node = temphead;
2556       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2557       for (i = 0; node; i++)
2558         {
2559           tempfiles[i].name = node->name;
2560           tempfiles[i].pid = node->pid;
2561           node = node->next;
2562         }
2563       merge (tempfiles, ntemps, ntemps, output_file);
2564       free (tempfiles);
2565     }
2566 }
2567
2568 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2569
2570 static void
2571 insertkey (struct keyfield *key_arg)
2572 {
2573   struct keyfield **p;
2574   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2575
2576   for (p = &keylist; *p; p = &(*p)->next)
2577     continue;
2578   *p = key;
2579   key->next = NULL;
2580 }
2581
2582 /* Report a bad field specification SPEC, with extra info MSGID.  */
2583
2584 static void badfieldspec (char const *, char const *)
2585      ATTRIBUTE_NORETURN;
2586 static void
2587 badfieldspec (char const *spec, char const *msgid)
2588 {
2589   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2590          _(msgid), quote (spec));
2591   abort ();
2592 }
2593
2594 /* Report incompatible options.  */
2595
2596 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2597 static void
2598 incompatible_options (char const *opts)
2599 {
2600   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2601   abort ();
2602 }
2603
2604 /* Check compatibility of ordering options.  */
2605
2606 static void
2607 check_ordering_compatibility (void)
2608 {
2609   struct keyfield const *key;
2610
2611   for (key = keylist; key; key = key->next)
2612     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2613               + !!key->ignore))
2614         || (key->random && key->translate))
2615       {
2616         char opts[7];
2617         char *p = opts;
2618         if (key->ignore == nondictionary)
2619           *p++ = 'd';
2620         if (key->translate)
2621           *p++ = 'f';
2622         if (key->general_numeric)
2623           *p++ = 'g';
2624         if (key->ignore == nonprinting)
2625           *p++ = 'i';
2626         if (key->month)
2627           *p++ = 'M';
2628         if (key->numeric)
2629           *p++ = 'n';
2630         if (key->random)
2631           *p++ = 'R';
2632         *p = '\0';
2633         incompatible_options (opts);
2634       }
2635 }
2636
2637 /* Parse the leading integer in STRING and store the resulting value
2638    (which must fit into size_t) into *VAL.  Return the address of the
2639    suffix after the integer.  If the value is too large, silently
2640    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
2641    failure; otherwise, report MSGID and exit on failure.  */
2642
2643 static char const *
2644 parse_field_count (char const *string, size_t *val, char const *msgid)
2645 {
2646   char *suffix;
2647   uintmax_t n;
2648
2649   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2650     {
2651     case LONGINT_OK:
2652     case LONGINT_INVALID_SUFFIX_CHAR:
2653       *val = n;
2654       if (*val == n)
2655         break;
2656       /* Fall through.  */
2657     case LONGINT_OVERFLOW:
2658     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2659       *val = SIZE_MAX;
2660       break;
2661
2662     case LONGINT_INVALID:
2663       if (msgid)
2664         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2665                _(msgid), quote (string));
2666       return NULL;
2667     }
2668
2669   return suffix;
2670 }
2671
2672 /* Handle interrupts and hangups. */
2673
2674 static void
2675 sighandler (int sig)
2676 {
2677   if (! SA_NOCLDSTOP)
2678     signal (sig, SIG_IGN);
2679
2680   cleanup ();
2681
2682   signal (sig, SIG_DFL);
2683   raise (sig);
2684 }
2685
2686 /* Set the ordering options for KEY specified in S.
2687    Return the address of the first character in S that
2688    is not a valid ordering option.
2689    BLANKTYPE is the kind of blanks that 'b' should skip. */
2690
2691 static char *
2692 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2693 {
2694   while (*s)
2695     {
2696       switch (*s)
2697         {
2698         case 'b':
2699           if (blanktype == bl_start || blanktype == bl_both)
2700             key->skipsblanks = true;
2701           if (blanktype == bl_end || blanktype == bl_both)
2702             key->skipeblanks = true;
2703           break;
2704         case 'd':
2705           key->ignore = nondictionary;
2706           break;
2707         case 'f':
2708           key->translate = fold_toupper;
2709           break;
2710         case 'g':
2711           key->general_numeric = true;
2712           break;
2713         case 'i':
2714           /* Option order should not matter, so don't let -i override
2715              -d.  -d implies -i, but -i does not imply -d.  */
2716           if (! key->ignore)
2717             key->ignore = nonprinting;
2718           break;
2719         case 'M':
2720           key->month = true;
2721           break;
2722         case 'n':
2723           key->numeric = true;
2724           break;
2725         case 'R':
2726           key->random = true;
2727           break;
2728         case 'r':
2729           key->reverse = true;
2730           break;
2731         default:
2732           return (char *) s;
2733         }
2734       ++s;
2735     }
2736   return (char *) s;
2737 }
2738
2739 static struct keyfield *
2740 key_init (struct keyfield *key)
2741 {
2742   memset (key, 0, sizeof *key);
2743   key->eword = SIZE_MAX;
2744   return key;
2745 }
2746
2747 int
2748 main (int argc, char **argv)
2749 {
2750   struct keyfield *key;
2751   struct keyfield key_buf;
2752   struct keyfield gkey;
2753   char const *s;
2754   int c = 0;
2755   char checkonly = 0;
2756   bool mergeonly = false;
2757   char *random_source = NULL;
2758   bool need_random = false;
2759   size_t nfiles = 0;
2760   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2761   bool obsolete_usage = (posix2_version () < 200112);
2762   char **files;
2763   char *files_from = NULL;
2764   struct Tokens tok;
2765   char const *outfile = NULL;
2766
2767   initialize_main (&argc, &argv);
2768   set_program_name (argv[0]);
2769   setlocale (LC_ALL, "");
2770   bindtextdomain (PACKAGE, LOCALEDIR);
2771   textdomain (PACKAGE);
2772
2773   initialize_exit_failure (SORT_FAILURE);
2774
2775   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2776 #if HAVE_NL_LANGINFO
2777   hard_LC_TIME = hard_locale (LC_TIME);
2778 #endif
2779
2780   /* Get locale's representation of the decimal point.  */
2781   {
2782     struct lconv const *locale = localeconv ();
2783
2784     /* If the locale doesn't define a decimal point, or if the decimal
2785        point is multibyte, use the C locale's decimal point.  FIXME:
2786        add support for multibyte decimal points.  */
2787     decimal_point = to_uchar (locale->decimal_point[0]);
2788     if (! decimal_point || locale->decimal_point[1])
2789       decimal_point = '.';
2790
2791     /* FIXME: add support for multibyte thousands separators.  */
2792     thousands_sep = to_uchar (*locale->thousands_sep);
2793     if (! thousands_sep || locale->thousands_sep[1])
2794       thousands_sep = -1;
2795   }
2796
2797   have_read_stdin = false;
2798   inittables ();
2799
2800   {
2801     size_t i;
2802     static int const sig[] =
2803       {
2804         /* The usual suspects.  */
2805         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2806 #ifdef SIGPOLL
2807         SIGPOLL,
2808 #endif
2809 #ifdef SIGPROF
2810         SIGPROF,
2811 #endif
2812 #ifdef SIGVTALRM
2813         SIGVTALRM,
2814 #endif
2815 #ifdef SIGXCPU
2816         SIGXCPU,
2817 #endif
2818 #ifdef SIGXFSZ
2819         SIGXFSZ,
2820 #endif
2821       };
2822     enum { nsigs = sizeof sig / sizeof sig[0] };
2823
2824 #if SA_NOCLDSTOP
2825     struct sigaction act;
2826
2827     sigemptyset (&caught_signals);
2828     for (i = 0; i < nsigs; i++)
2829       {
2830         sigaction (sig[i], NULL, &act);
2831         if (act.sa_handler != SIG_IGN)
2832           sigaddset (&caught_signals, sig[i]);
2833       }
2834
2835     act.sa_handler = sighandler;
2836     act.sa_mask = caught_signals;
2837     act.sa_flags = 0;
2838
2839     for (i = 0; i < nsigs; i++)
2840       if (sigismember (&caught_signals, sig[i]))
2841         sigaction (sig[i], &act, NULL);
2842 #else
2843     for (i = 0; i < nsigs; i++)
2844       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2845         {
2846           signal (sig[i], sighandler);
2847           siginterrupt (sig[i], 1);
2848         }
2849 #endif
2850   }
2851
2852   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
2853   atexit (exit_cleanup);
2854
2855   gkey.sword = gkey.eword = SIZE_MAX;
2856   gkey.ignore = NULL;
2857   gkey.translate = NULL;
2858   gkey.numeric = gkey.general_numeric = gkey.random = false;
2859   gkey.month = gkey.reverse = false;
2860   gkey.skipsblanks = gkey.skipeblanks = false;
2861
2862   files = xnmalloc (argc, sizeof *files);
2863
2864   for (;;)
2865     {
2866       /* Parse an operand as a file after "--" was seen; or if
2867          pedantic and a file was seen, unless the POSIX version
2868          predates 1003.1-2001 and -c was not seen and the operand is
2869          "-o FILE" or "-oFILE".  */
2870       int oi = -1;
2871
2872       if (c == -1
2873           || (posixly_correct && nfiles != 0
2874               && ! (obsolete_usage
2875                     && ! checkonly
2876                     && optind != argc
2877                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2878                     && (argv[optind][2] || optind + 1 != argc)))
2879           || ((c = getopt_long (argc, argv, short_options,
2880                                 long_options, &oi))
2881               == -1))
2882         {
2883           if (argc <= optind)
2884             break;
2885           files[nfiles++] = argv[optind++];
2886         }
2887       else switch (c)
2888         {
2889         case 1:
2890           key = NULL;
2891           if (optarg[0] == '+')
2892             {
2893               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2894                                       && ISDIGIT (argv[optind][1]));
2895               obsolete_usage |= minus_pos_usage & ~posixly_correct;
2896               if (obsolete_usage)
2897                 {
2898                   /* Treat +POS1 [-POS2] as a key if possible; but silently
2899                      treat an operand as a file if it is not a valid +POS1.  */
2900                   key = key_init (&key_buf);
2901                   s = parse_field_count (optarg + 1, &key->sword, NULL);
2902                   if (s && *s == '.')
2903                     s = parse_field_count (s + 1, &key->schar, NULL);
2904                   if (! (key->sword | key->schar))
2905                     key->sword = SIZE_MAX;
2906                   if (! s || *set_ordering (s, key, bl_start))
2907                     key = NULL;
2908                   else
2909                     {
2910                       if (minus_pos_usage)
2911                         {
2912                           char const *optarg1 = argv[optind++];
2913                           s = parse_field_count (optarg1 + 1, &key->eword,
2914                                              N_("invalid number after `-'"));
2915                           if (*s == '.')
2916                             s = parse_field_count (s + 1, &key->echar,
2917                                                N_("invalid number after `.'"));
2918                           if (*set_ordering (s, key, bl_end))
2919                             badfieldspec (optarg1,
2920                                       N_("stray character in field spec"));
2921                         }
2922                       insertkey (key);
2923                     }
2924                 }
2925             }
2926           if (! key)
2927             files[nfiles++] = optarg;
2928           break;
2929
2930         case SORT_OPTION:
2931           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
2932           /* Fall through. */
2933         case 'b':
2934         case 'd':
2935         case 'f':
2936         case 'g':
2937         case 'i':
2938         case 'M':
2939         case 'n':
2940         case 'r':
2941         case 'R':
2942           {
2943             char str[2];
2944             str[0] = c;
2945             str[1] = '\0';
2946             set_ordering (str, &gkey, bl_both);
2947           }
2948           break;
2949
2950         case CHECK_OPTION:
2951           c = (optarg
2952                ? XARGMATCH ("--check", optarg, check_args, check_types)
2953                : 'c');
2954           /* Fall through.  */
2955         case 'c':
2956         case 'C':
2957           if (checkonly && checkonly != c)
2958             incompatible_options ("cC");
2959           checkonly = c;
2960           break;
2961
2962         case COMPRESS_PROGRAM_OPTION:
2963           if (compress_program && !STREQ (compress_program, optarg))
2964             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2965           compress_program = optarg;
2966           break;
2967
2968         case FILES0_FROM_OPTION:
2969           files_from = optarg;
2970           break;
2971
2972         case 'k':
2973           key = key_init (&key_buf);
2974
2975           /* Get POS1. */
2976           s = parse_field_count (optarg, &key->sword,
2977                                  N_("invalid number at field start"));
2978           if (! key->sword--)
2979             {
2980               /* Provoke with `sort -k0' */
2981               badfieldspec (optarg, N_("field number is zero"));
2982             }
2983           if (*s == '.')
2984             {
2985               s = parse_field_count (s + 1, &key->schar,
2986                                      N_("invalid number after `.'"));
2987               if (! key->schar--)
2988                 {
2989                   /* Provoke with `sort -k1.0' */
2990                   badfieldspec (optarg, N_("character offset is zero"));
2991                 }
2992             }
2993           if (! (key->sword | key->schar))
2994             key->sword = SIZE_MAX;
2995           s = set_ordering (s, key, bl_start);
2996           if (*s != ',')
2997             {
2998               key->eword = SIZE_MAX;
2999               key->echar = 0;
3000             }
3001           else
3002             {
3003               /* Get POS2. */
3004               s = parse_field_count (s + 1, &key->eword,
3005                                      N_("invalid number after `,'"));
3006               if (! key->eword--)
3007                 {
3008                   /* Provoke with `sort -k1,0' */
3009                   badfieldspec (optarg, N_("field number is zero"));
3010                 }
3011               if (*s == '.')
3012                 s = parse_field_count (s + 1, &key->echar,
3013                                        N_("invalid number after `.'"));
3014               else
3015                 {
3016                   /* `-k 2,3' is equivalent to `+1 -3'.  */
3017                   key->eword++;
3018                 }
3019               s = set_ordering (s, key, bl_end);
3020             }
3021           if (*s)
3022             badfieldspec (optarg, N_("stray character in field spec"));
3023           insertkey (key);
3024           break;
3025
3026         case 'm':
3027           mergeonly = true;
3028           break;
3029
3030         case 'o':
3031           if (outfile && !STREQ (outfile, optarg))
3032             error (SORT_FAILURE, 0, _("multiple output files specified"));
3033           outfile = optarg;
3034           break;
3035
3036         case RANDOM_SOURCE_OPTION:
3037           if (random_source && !STREQ (random_source, optarg))
3038             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3039           random_source = optarg;
3040           break;
3041
3042         case 's':
3043           stable = true;
3044           break;
3045
3046         case 'S':
3047           specify_sort_size (oi, c, optarg);
3048           break;
3049
3050         case 't':
3051           {
3052             char newtab = optarg[0];
3053             if (! newtab)
3054               error (SORT_FAILURE, 0, _("empty tab"));
3055             if (optarg[1])
3056               {
3057                 if (STREQ (optarg, "\\0"))
3058                   newtab = '\0';
3059                 else
3060                   {
3061                     /* Provoke with `sort -txx'.  Complain about
3062                        "multi-character tab" instead of "multibyte tab", so
3063                        that the diagnostic's wording does not need to be
3064                        changed once multibyte characters are supported.  */
3065                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3066                            quote (optarg));
3067                   }
3068               }
3069             if (tab != TAB_DEFAULT && tab != newtab)
3070               error (SORT_FAILURE, 0, _("incompatible tabs"));
3071             tab = newtab;
3072           }
3073           break;
3074
3075         case 'T':
3076           add_temp_dir (optarg);
3077           break;
3078
3079         case 'u':
3080           unique = true;
3081           break;
3082
3083         case 'y':
3084           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3085              through Solaris 7.  It is also accepted by many non-Solaris
3086              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3087              -y is marked as obsolete starting with Solaris 8 (1999), but is
3088              still accepted as of Solaris 10 prerelease (2004).
3089
3090              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3091              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3092              and which in general ignores the argument after "-y" if it
3093              consists entirely of digits (it can even be empty).  */
3094           if (optarg == argv[optind - 1])
3095             {
3096               char const *p;
3097               for (p = optarg; ISDIGIT (*p); p++)
3098                 continue;
3099               optind -= (*p != '\0');
3100             }
3101           break;
3102
3103         case 'z':
3104           eolchar = 0;
3105           break;
3106
3107         case_GETOPT_HELP_CHAR;
3108
3109         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3110
3111         default:
3112           usage (SORT_FAILURE);
3113         }
3114     }
3115
3116   if (files_from)
3117     {
3118       FILE *stream;
3119
3120       /* When using --files0-from=F, you may not specify any files
3121          on the command-line.  */
3122       if (nfiles)
3123         {
3124           error (0, 0, _("extra operand %s"), quote (files[0]));
3125           fprintf (stderr, "%s\n",
3126                    _("file operands cannot be combined with --files0-from"));
3127           usage (SORT_FAILURE);
3128         }
3129
3130       if (STREQ (files_from, "-"))
3131         stream = stdin;
3132       else
3133         {
3134           stream = fopen (files_from, "r");
3135           if (stream == NULL)
3136             error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3137                    quote (files_from));
3138         }
3139
3140       readtokens0_init (&tok);
3141
3142       if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3143         error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3144                quote (files_from));
3145
3146       if (tok.n_tok)
3147         {
3148           size_t i;
3149           free (files);
3150           files = tok.tok;
3151           nfiles = tok.n_tok;
3152           for (i = 0; i < nfiles; i++)
3153           {
3154               if (STREQ (files[i], "-"))
3155                 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3156                                           "no file name of %s allowed"),
3157                        quote (files[i]));
3158               else if (files[i][0] == '\0')
3159                 {
3160                   /* Using the standard `filename:line-number:' prefix here is
3161                      not totally appropriate, since NUL is the separator, not NL,
3162                      but it might be better than nothing.  */
3163                   unsigned long int file_number = i + 1;
3164                   error (SORT_FAILURE, 0,
3165                          _("%s:%lu: invalid zero-length file name"),
3166                          quotearg_colon (files_from), file_number);
3167                 }
3168           }
3169         }
3170       else
3171         error (SORT_FAILURE, 0, _("no input from %s"),
3172                quote (files_from));
3173     }
3174
3175   /* Inheritance of global options to individual keys. */
3176   for (key = keylist; key; key = key->next)
3177     {
3178       if (! (key->ignore || key->translate
3179              || (key->skipsblanks | key->reverse
3180                  | key->skipeblanks | key->month | key->numeric
3181                  | key->general_numeric
3182                  | key->random)))
3183         {
3184           key->ignore = gkey.ignore;
3185           key->translate = gkey.translate;
3186           key->skipsblanks = gkey.skipsblanks;
3187           key->skipeblanks = gkey.skipeblanks;
3188           key->month = gkey.month;
3189           key->numeric = gkey.numeric;
3190           key->general_numeric = gkey.general_numeric;
3191           key->random = gkey.random;
3192           key->reverse = gkey.reverse;
3193         }
3194
3195       need_random |= key->random;
3196     }
3197
3198   if (!keylist && (gkey.ignore || gkey.translate
3199                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3200                        | gkey.numeric | gkey.general_numeric
3201                        | gkey.random)))
3202     {
3203       insertkey (&gkey);
3204       need_random |= gkey.random;
3205     }
3206
3207   check_ordering_compatibility ();
3208
3209   reverse = gkey.reverse;
3210
3211   if (need_random)
3212     {
3213       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3214       if (! randread_source)
3215         die (_("open failed"), random_source);
3216     }
3217
3218   if (temp_dir_count == 0)
3219     {
3220       char const *tmp_dir = getenv ("TMPDIR");
3221       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3222     }
3223
3224   if (nfiles == 0)
3225     {
3226       static char *minus = "-";
3227       nfiles = 1;
3228       free (files);
3229       files = &minus;
3230     }
3231
3232   if (checkonly)
3233     {
3234       if (nfiles > 1)
3235         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3236                quote (files[1]), checkonly);
3237
3238       if (outfile)
3239         {
3240           static char opts[] = {0, 'o', 0};
3241           opts[0] = checkonly;
3242           incompatible_options (opts);
3243         }
3244
3245       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3246          input is not properly sorted.  */
3247       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3248     }
3249
3250   if (mergeonly)
3251     {
3252       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3253       size_t i;
3254
3255       for (i = 0; i < nfiles; ++i)
3256         sortfiles[i].name = files[i];
3257
3258       merge (sortfiles, 0, nfiles, outfile);
3259       IF_LINT (free (sortfiles));
3260     }
3261   else
3262     sort (files, nfiles, outfile);
3263
3264   if (have_read_stdin && fclose (stdin) == EOF)
3265     die (_("close failed"), "-");
3266
3267   exit (EXIT_SUCCESS);
3268 }