src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17    Written December 1988 by Mike Haertel.
  18    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  19    or (US mail) as Mike Haertel c/o Free Software Foundation.
  20
  21    Ørn E. Hansen added NLS support in 1997.  */
  22
  23 #include <config.h>
  24
  25 #include <getopt.h>
  26 #include <sys/types.h>
  27 #include <sys/wait.h>
  28 #include <signal.h>
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "error.h"
  32 #include "hard-locale.h"
  33 #include "hash.h"
  34 #include "inttostr.h"
  35 #include "md5.h"
  36 #include "physmem.h"
  37 #include "posixver.h"
  38 #include "quote.h"
  39 #include "randread.h"
  40 #include "stdio--.h"
  41 #include "stdlib--.h"
  42 #include "strnumcmp.h"
  43 #include "xmemcoll.h"
  44 #include "xmemxfrm.h"
  45 #include "xstrtol.h"
  46
  47 #if HAVE_SYS_RESOURCE_H
  48 # include <sys/resource.h>
  49 #endif
  50 #ifndef RLIMIT_DATA
  51 struct rlimit { size_t rlim_cur; };
  52 # define getrlimit(Resource, Rlp) (-1)
  53 #endif
  54
  55 /* The official name of this program (e.g., no `g' prefix).  */
  56 #define PROGRAM_NAME "sort"
  57
  58 #define AUTHORS \
  59   proper_name ("Mike Haertel"), \
  60   proper_name ("Paul Eggert")
  61
  62 #if HAVE_LANGINFO_CODESET
  63 # include <langinfo.h>
  64 #endif
  65
  66 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  67    present.  */
  68 #ifndef SA_NOCLDSTOP
  69 # define SA_NOCLDSTOP 0
  70 /* No sigprocmask.  Always 'return' zero. */
  71 # define sigprocmask(How, Set, Oset) (0)
  72 # define sigset_t int
  73 # if ! HAVE_SIGINTERRUPT
  74 #  define siginterrupt(sig, flag) /* empty */
  75 # endif
  76 #endif
  77
  78 #ifndef STDC_HEADERS
  79 double strtod ();
  80 #endif
  81
  82 #define UCHAR_LIM (UCHAR_MAX + 1)
  83
  84 #ifndef DEFAULT_TMPDIR
  85 # define DEFAULT_TMPDIR "/tmp"
  86 #endif
  87
  88 /* Exit statuses.  */
  89 enum
  90   {
  91     /* POSIX says to exit with status 1 if invoked with -c and the
  92        input is not properly sorted.  */
  93     SORT_OUT_OF_ORDER = 1,
  94
  95     /* POSIX says any other irregular exit must exit with a status
  96        code greater than 1.  */
  97     SORT_FAILURE = 2
  98   };
  99
 100 enum
 101   {
 102     /* The number of times we should try to fork a compression process
 103        (we retry if the fork call fails).  We don't _need_ to compress
 104        temp files, this is just to reduce disk access, so this number
 105        can be small.  */
 106     MAX_FORK_TRIES_COMPRESS = 2,
 107
 108     /* The number of times we should try to fork a decompression process.
 109        If we can't fork a decompression process, we can't sort, so this
 110        number should be big.  */
 111     MAX_FORK_TRIES_DECOMPRESS = 8
 112   };
 113
 114 /* The representation of the decimal point in the current locale.  */
 115 static int decimal_point;
 116
 117 /* Thousands separator; if -1, then there isn't one.  */
 118 static int thousands_sep;
 119
 120 /* Nonzero if the corresponding locales are hard.  */
 121 static bool hard_LC_COLLATE;
 122 #if HAVE_NL_LANGINFO
 123 static bool hard_LC_TIME;
 124 #endif
 125
 126 #define NONZERO(x) ((x) != 0)
 127
 128 /* The kind of blanks for '-b' to skip in various options. */
 129 enum blanktype { bl_start, bl_end, bl_both };
 130
 131 /* The character marking end of line. Default to \n. */
 132 static char eolchar = '\n';
 133
 134 /* Lines are held in core as counted strings. */
 135 struct line
 136 {
 137   char *text;                   /* Text of the line. */
 138   size_t length;                /* Length including final newline. */
 139   char *keybeg;                 /* Start of first key. */
 140   char *keylim;                 /* Limit of first key. */
 141 };
 142
 143 /* Input buffers. */
 144 struct buffer
 145 {
 146   char *buf;                    /* Dynamically allocated buffer,
 147                                    partitioned into 3 regions:
 148                                    - input data;
 149                                    - unused area;
 150                                    - an array of lines, in reverse order.  */
 151   size_t used;                  /* Number of bytes used for input data.  */
 152   size_t nlines;                /* Number of lines in the line array.  */
 153   size_t alloc;                 /* Number of bytes allocated. */
 154   size_t left;                  /* Number of bytes left from previous reads. */
 155   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 156   bool eof;                     /* An EOF has been read.  */
 157 };
 158
 159 struct keyfield
 160 {
 161   size_t sword;                 /* Zero-origin 'word' to start at. */
 162   size_t schar;                 /* Additional characters to skip. */
 163   size_t eword;                 /* Zero-origin first word after field. */
 164   size_t echar;                 /* Additional characters in field. */
 165   bool const *ignore;           /* Boolean array of characters to ignore. */
 166   char const *translate;        /* Translation applied to characters. */
 167   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 168   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 169   bool numeric;                 /* Flag for numeric comparison.  Handle
 170                                    strings of digits with optional decimal
 171                                    point, but no exponential notation. */
 172   bool random;                  /* Sort by random hash of key.  */
 173   bool general_numeric;         /* Flag for general, numeric comparison.
 174                                    Handle numbers in exponential notation. */
 175   bool month;                   /* Flag for comparison by month name. */
 176   bool reverse;                 /* Reverse the sense of comparison. */
 177   struct keyfield *next;        /* Next keyfield to try. */
 178 };
 179
 180 struct month
 181 {
 182   char const *name;
 183   int val;
 184 };
 185
 186 /* FIXME: None of these tables work with multibyte character sets.
 187    Also, there are many other bugs when handling multibyte characters.
 188    One way to fix this is to rewrite `sort' to use wide characters
 189    internally, but doing this with good performance is a bit
 190    tricky.  */
 191
 192 /* Table of blanks.  */
 193 static bool blanks[UCHAR_LIM];
 194
 195 /* Table of non-printing characters. */
 196 static bool nonprinting[UCHAR_LIM];
 197
 198 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 199 static bool nondictionary[UCHAR_LIM];
 200
 201 /* Translation table folding lower case to upper.  */
 202 static char fold_toupper[UCHAR_LIM];
 203
 204 #define MONTHS_PER_YEAR 12
 205
 206 /* Table mapping month names to integers.
 207    Alphabetic order allows binary search. */
 208 static struct month monthtab[] =
 209 {
 210   {"APR", 4},
 211   {"AUG", 8},
 212   {"DEC", 12},
 213   {"FEB", 2},
 214   {"JAN", 1},
 215   {"JUL", 7},
 216   {"JUN", 6},
 217   {"MAR", 3},
 218   {"MAY", 5},
 219   {"NOV", 11},
 220   {"OCT", 10},
 221   {"SEP", 9}
 222 };
 223
 224 /* During the merge phase, the number of files to merge at once. */
 225 #define NMERGE 16
 226
 227 /* Minimum size for a merge or check buffer.  */
 228 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 229
 230 /* Minimum sort size; the code might not work with smaller sizes.  */
 231 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 232
 233 /* The number of bytes needed for a merge or check buffer, which can
 234    function relatively efficiently even if it holds only one line.  If
 235    a longer line is seen, this value is increased.  */
 236 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 237
 238 /* The approximate maximum number of bytes of main memory to use, as
 239    specified by the user.  Zero if the user has not specified a size.  */
 240 static size_t sort_size;
 241
 242 /* The guessed size for non-regular files.  */
 243 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 244
 245 /* Array of directory names in which any temporary files are to be created. */
 246 static char const **temp_dirs;
 247
 248 /* Number of temporary directory names used.  */
 249 static size_t temp_dir_count;
 250
 251 /* Number of allocated slots in temp_dirs.  */
 252 static size_t temp_dir_alloc;
 253
 254 /* Flag to reverse the order of all comparisons. */
 255 static bool reverse;
 256
 257 /* Flag for stable sort.  This turns off the last ditch bytewise
 258    comparison of lines, and instead leaves lines in the same order
 259    they were read if all keys compare equal.  */
 260 static bool stable;
 261
 262 /* If TAB has this value, blanks separate fields.  */
 263 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 264
 265 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 266    separated by the empty string between a non-blank character and a blank
 267    character. */
 268 static int tab = TAB_DEFAULT;
 269
 270 /* Flag to remove consecutive duplicate lines from the output.
 271    Only the last of a sequence of equal lines will be output. */
 272 static bool unique;
 273
 274 /* Nonzero if any of the input files are the standard input. */
 275 static bool have_read_stdin;
 276
 277 /* List of key field comparisons to be tried.  */
 278 static struct keyfield *keylist;
 279
 280 /* Program used to (de)compress temp files.  Must accept -d.  */
 281 static char const *compress_program;
 282
 283 static void sortlines_temp (struct line *, size_t, struct line *);
 284
 285 /* Report MESSAGE for FILE, then clean up and exit.
 286    If FILE is null, it represents standard output.  */
 287
 288 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 289 static void
 290 die (char const *message, char const *file)
 291 {
 292   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 293   exit (SORT_FAILURE);
 294 }
 295
 296 void
 297 usage (int status)
 298 {
 299   if (status != EXIT_SUCCESS)
 300     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 301              program_name);
 302   else
 303     {
 304       printf (_("\
 305 Usage: %s [OPTION]... [FILE]...\n\
 306 "),
 307               program_name);
 308       fputs (_("\
 309 Write sorted concatenation of all FILE(s) to standard output.\n\
 310 \n\
 311 "), stdout);
 312       fputs (_("\
 313 Mandatory arguments to long options are mandatory for short options too.\n\
 314 "), stdout);
 315       fputs (_("\
 316 Ordering options:\n\
 317 \n\
 318 "), stdout);
 319       fputs (_("\
 320   -b, --ignore-leading-blanks  ignore leading blanks\n\
 321   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 322   -f, --ignore-case           fold lower case to upper case characters\n\
 323 "), stdout);
 324       fputs (_("\
 325   -g, --general-numeric-sort  compare according to general numerical value\n\
 326   -i, --ignore-nonprinting    consider only printable characters\n\
 327   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 328   -n, --numeric-sort          compare according to string numerical value\n\
 329   -R, --random-sort           sort by random hash of keys\n\
 330       --random-source=FILE    get random bytes from FILE (default /dev/urandom)\n\
 331       --sort=WORD             sort according to WORD:\n\
 332                                 general-numeric -g, month -M, numeric -n,\n\
 333                                 random -R\n\
 334   -r, --reverse               reverse the result of comparisons\n\
 335 \n\
 336 "), stdout);
 337       fputs (_("\
 338 Other options:\n\
 339 \n\
 340   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 341   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 342       --compress-program=PROG  compress temporaries with PROG;\n\
 343                               decompress them with PROG -d\n\
 344   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
 345   -m, --merge               merge already sorted files; do not sort\n\
 346 "), stdout);
 347       fputs (_("\
 348   -o, --output=FILE         write result to FILE instead of standard output\n\
 349   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 350   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 351 "), stdout);
 352       printf (_("\
 353   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 354   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 355                               multiple options specify multiple directories\n\
 356   -u, --unique              with -c, check for strict ordering;\n\
 357                               without -c, output only the first of an equal run\n\
 358 "), DEFAULT_TMPDIR);
 359       fputs (_("\
 360   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 361 "), stdout);
 362       fputs (HELP_OPTION_DESCRIPTION, stdout);
 363       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 364       fputs (_("\
 365 \n\
 366 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 367 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 368 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 369 one or more single-letter ordering options, which override global ordering\n\
 370 options for that key.  If no key is given, use the entire line as the key.\n\
 371 \n\
 372 SIZE may be followed by the following multiplicative suffixes:\n\
 373 "), stdout);
 374       fputs (_("\
 375 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 376 \n\
 377 With no FILE, or when FILE is -, read standard input.\n\
 378 \n\
 379 *** WARNING ***\n\
 380 The locale specified by the environment affects sort order.\n\
 381 Set LC_ALL=C to get the traditional sort order that uses\n\
 382 native byte values.\n\
 383 "), stdout );
 384       emit_bug_reporting_address ();
 385     }
 386
 387   exit (status);
 388 }
 389
 390 /* For long options that have no equivalent short option, use a
 391    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 392 enum
 393 {
 394   CHECK_OPTION = CHAR_MAX + 1,
 395   COMPRESS_PROGRAM_OPTION,
 396   RANDOM_SOURCE_OPTION,
 397   SORT_OPTION
 398 };
 399
 400 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
 401
 402 static struct option const long_options[] =
 403 {
 404   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 405   {"check", optional_argument, NULL, CHECK_OPTION},
 406   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 407   {"dictionary-order", no_argument, NULL, 'd'},
 408   {"ignore-case", no_argument, NULL, 'f'},
 409   {"general-numeric-sort", no_argument, NULL, 'g'},
 410   {"ignore-nonprinting", no_argument, NULL, 'i'},
 411   {"key", required_argument, NULL, 'k'},
 412   {"merge", no_argument, NULL, 'm'},
 413   {"month-sort", no_argument, NULL, 'M'},
 414   {"numeric-sort", no_argument, NULL, 'n'},
 415   {"random-sort", no_argument, NULL, 'R'},
 416   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 417   {"sort", required_argument, NULL, SORT_OPTION},
 418   {"output", required_argument, NULL, 'o'},
 419   {"reverse", no_argument, NULL, 'r'},
 420   {"stable", no_argument, NULL, 's'},
 421   {"buffer-size", required_argument, NULL, 'S'},
 422   {"field-separator", required_argument, NULL, 't'},
 423   {"temporary-directory", required_argument, NULL, 'T'},
 424   {"unique", no_argument, NULL, 'u'},
 425   {"zero-terminated", no_argument, NULL, 'z'},
 426   {GETOPT_HELP_OPTION_DECL},
 427   {GETOPT_VERSION_OPTION_DECL},
 428   {NULL, 0, NULL, 0},
 429 };
 430
 431 static char const *const check_args[] =
 432 {
 433   "quiet", "silent", "diagnose-first", NULL
 434 };
 435 static char const check_types[] =
 436 {
 437   'C', 'C', 'c'
 438 };
 439 ARGMATCH_VERIFY (check_args, check_types);
 440
 441 static char const *const sort_args[] =
 442 {
 443   "general-numeric", "month", "numeric", "random", NULL
 444 };
 445 static char const sort_types[] =
 446 {
 447   'g', 'M', 'n', 'R'
 448 };
 449 ARGMATCH_VERIFY (sort_args, sort_types);
 450
 451 /* The set of signals that are caught.  */
 452 static sigset_t caught_signals;
 453
 454 /* Critical section status.  */
 455 struct cs_status
 456 {
 457   bool valid;
 458   sigset_t sigs;
 459 };
 460
 461 /* Enter a critical section.  */
 462 static struct cs_status
 463 cs_enter (void)
 464 {
 465   struct cs_status status;
 466   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 467   return status;
 468 }
 469
 470 /* Leave a critical section.  */
 471 static void
 472 cs_leave (struct cs_status status)
 473 {
 474   if (status.valid)
 475     {
 476       /* Ignore failure when restoring the signal mask. */
 477       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 478     }
 479 }
 480
 481 /* The list of temporary files. */
 482 struct tempnode
 483 {
 484   struct tempnode *volatile next;
 485   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 486   char name[1];  /* Actual size is 1 + file name length.  */
 487 };
 488 static struct tempnode *volatile temphead;
 489 static struct tempnode *volatile *temptail = &temphead;
 490
 491 struct sortfile
 492 {
 493   char const *name;
 494   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 495 };
 496
 497 /* A table where we store compression process states.  We clean up all
 498    processes in a timely manner so as not to exhaust system resources,
 499    so we store the info on whether the process is still running, or has
 500    been reaped here.  */
 501 static Hash_table *proctab;
 502
 503 enum { INIT_PROCTAB_SIZE = 47 };
 504
 505 enum procstate { ALIVE, ZOMBIE };
 506
 507 /* A proctab entry.  The COUNT field is there in case we fork a new
 508    compression process that has the same PID as an old zombie process
 509    that is still in the table (because the process to decompress the
 510    temp file it was associated with hasn't started yet).  */
 511 struct procnode
 512 {
 513   pid_t pid;
 514   enum procstate state;
 515   size_t count;
 516 };
 517
 518 static size_t
 519 proctab_hasher (const void *entry, size_t tabsize)
 520 {
 521   const struct procnode *node = entry;
 522   return node->pid % tabsize;
 523 }
 524
 525 static bool
 526 proctab_comparator (const void *e1, const void *e2)
 527 {
 528   const struct procnode *n1 = e1, *n2 = e2;
 529   return n1->pid == n2->pid;
 530 }
 531
 532 /* The total number of forked processes (compressors and decompressors)
 533    that have not been reaped yet. */
 534 static size_t nprocs;
 535
 536 /* The number of child processes we'll allow before we try to reap some. */
 537 enum { MAX_PROCS_BEFORE_REAP = 2 };
 538
 539 /* If 0 < PID, wait for the child process with that PID to exit.
 540    If PID is -1, clean up a random child process which has finished and
 541    return the process ID of that child.  If PID is -1 and no processes
 542    have quit yet, return 0 without waiting.  */
 543
 544 static pid_t
 545 reap (pid_t pid)
 546 {
 547   int status;
 548   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 549
 550   if (cpid < 0)
 551     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 552            compress_program);
 553   else if (0 < cpid)
 554     {
 555       if (! WIFEXITED (status) || WEXITSTATUS (status))
 556         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 557                compress_program);
 558       --nprocs;
 559     }
 560
 561   return cpid;
 562 }
 563
 564 /* Add the PID of a running compression process to proctab, or update
 565    the entry COUNT and STATE fields if it's already there.  This also
 566    creates the table for us the first time it's called.  */
 567
 568 static void
 569 register_proc (pid_t pid)
 570 {
 571   struct procnode test, *node;
 572
 573   if (! proctab)
 574     {
 575       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 576                                  proctab_hasher,
 577                                  proctab_comparator,
 578                                  free);
 579       if (! proctab)
 580         xalloc_die ();
 581     }
 582
 583   test.pid = pid;
 584   node = hash_lookup (proctab, &test);
 585   if (node)
 586     {
 587       node->state = ALIVE;
 588       ++node->count;
 589     }
 590   else
 591     {
 592       node = xmalloc (sizeof *node);
 593       node->pid = pid;
 594       node->state = ALIVE;
 595       node->count = 1;
 596       hash_insert (proctab, node);
 597     }
 598 }
 599
 600 /* This is called when we reap a random process.  We don't know
 601    whether we have reaped a compression process or a decompression
 602    process until we look in the table.  If there's an ALIVE entry for
 603    it, then we have reaped a compression process, so change the state
 604    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 605
 606 static void
 607 update_proc (pid_t pid)
 608 {
 609   struct procnode test, *node;
 610
 611   test.pid = pid;
 612   node = hash_lookup (proctab, &test);
 613   if (node)
 614     node->state = ZOMBIE;
 615 }
 616
 617 /* This is for when we need to wait for a compression process to exit.
 618    If it has a ZOMBIE entry in the table then it's already dead and has
 619    been reaped.  Note that if there's an ALIVE entry for it, it still may
 620    already have died and been reaped if a second process was created with
 621    the same PID.  This is probably exceedingly rare, but to be on the safe
 622    side we will have to wait for any compression process with this PID.  */
 623
 624 static void
 625 wait_proc (pid_t pid)
 626 {
 627   struct procnode test, *node;
 628
 629   test.pid = pid;
 630   node = hash_lookup (proctab, &test);
 631   if (node->state == ALIVE)
 632     reap (pid);
 633
 634   node->state = ZOMBIE;
 635   if (! --node->count)
 636     {
 637       hash_delete (proctab, node);
 638       free (node);
 639     }
 640 }
 641
 642 /* Keep reaping finished children as long as there are more to reap.
 643    This doesn't block waiting for any of them, it only reaps those
 644    that are already dead.  */
 645
 646 static void
 647 reap_some (void)
 648 {
 649   pid_t pid;
 650
 651   while (0 < nprocs && (pid = reap (-1)))
 652     update_proc (pid);
 653 }
 654
 655 /* Clean up any remaining temporary files.  */
 656
 657 static void
 658 cleanup (void)
 659 {
 660   struct tempnode const *node;
 661
 662   for (node = temphead; node; node = node->next)
 663     unlink (node->name);
 664   temphead = NULL;
 665 }
 666
 667 /* Cleanup actions to take when exiting.  */
 668
 669 static void
 670 exit_cleanup (void)
 671 {
 672   if (temphead)
 673     {
 674       /* Clean up any remaining temporary files in a critical section so
 675          that a signal handler does not try to clean them too.  */
 676       struct cs_status cs = cs_enter ();
 677       cleanup ();
 678       cs_leave (cs);
 679     }
 680
 681   close_stdout ();
 682 }
 683
 684 /* Create a new temporary file, returning its newly allocated tempnode.
 685    Store into *PFD the file descriptor open for writing.  */
 686
 687 static struct tempnode *
 688 create_temp_file (int *pfd)
 689 {
 690   static char const slashbase[] = "/sortXXXXXX";
 691   static size_t temp_dir_index;
 692   int fd;
 693   int saved_errno;
 694   char const *temp_dir = temp_dirs[temp_dir_index];
 695   size_t len = strlen (temp_dir);
 696   struct tempnode *node =
 697     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 698   char *file = node->name;
 699   struct cs_status cs;
 700
 701   memcpy (file, temp_dir, len);
 702   memcpy (file + len, slashbase, sizeof slashbase);
 703   node->next = NULL;
 704   node->pid = 0;
 705   if (++temp_dir_index == temp_dir_count)
 706     temp_dir_index = 0;
 707
 708   /* Create the temporary file in a critical section, to avoid races.  */
 709   cs = cs_enter ();
 710   fd = mkstemp (file);
 711   if (0 <= fd)
 712     {
 713       *temptail = node;
 714       temptail = &node->next;
 715     }
 716   saved_errno = errno;
 717   cs_leave (cs);
 718   errno = saved_errno;
 719
 720   if (fd < 0)
 721     die (_("cannot create temporary file"), file);
 722
 723   *pfd = fd;
 724   return node;
 725 }
 726
 727 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 728    standard output; HOW should be "w".  When opening for input, "-"
 729    means standard input.  To avoid confusion, do not return file
 730    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 731    opening an ordinary FILE.  */
 732
 733 static FILE *
 734 xfopen (const char *file, const char *how)
 735 {
 736   FILE *fp;
 737
 738   if (!file)
 739     fp = stdout;
 740   else if (STREQ (file, "-") && *how == 'r')
 741     {
 742       have_read_stdin = true;
 743       fp = stdin;
 744     }
 745   else
 746     {
 747       fp = fopen (file, how);
 748       if (! fp)
 749         die (_("open failed"), file);
 750     }
 751
 752   return fp;
 753 }
 754
 755 /* Close FP, whose name is FILE, and report any errors.  */
 756
 757 static void
 758 xfclose (FILE *fp, char const *file)
 759 {
 760   switch (fileno (fp))
 761     {
 762     case STDIN_FILENO:
 763       /* Allow reading stdin from tty more than once.  */
 764       if (feof (fp))
 765         clearerr (fp);
 766       break;
 767
 768     case STDOUT_FILENO:
 769       /* Don't close stdout just yet.  close_stdout does that.  */
 770       if (fflush (fp) != 0)
 771         die (_("fflush failed"), file);
 772       break;
 773
 774     default:
 775       if (fclose (fp) != 0)
 776         die (_("close failed"), file);
 777       break;
 778     }
 779 }
 780
 781 static void
 782 dup2_or_die (int oldfd, int newfd)
 783 {
 784   if (dup2 (oldfd, newfd) < 0)
 785     error (SORT_FAILURE, errno, _("dup2 failed"));
 786 }
 787
 788 /* Fork a child process for piping to and do common cleanup.  The
 789    TRIES parameter tells us how many times to try to fork before
 790    giving up.  Return the PID of the child or -1 if fork failed.  */
 791
 792 static pid_t
 793 pipe_fork (int pipefds[2], size_t tries)
 794 {
 795 #if HAVE_WORKING_FORK
 796   struct tempnode *saved_temphead;
 797   int saved_errno;
 798   unsigned int wait_retry = 1;
 799   pid_t pid IF_LINT (= -1);
 800   struct cs_status cs;
 801
 802   if (pipe (pipefds) < 0)
 803     return -1;
 804
 805   while (tries--)
 806     {
 807       /* This is so the child process won't delete our temp files
 808          if it receives a signal before exec-ing.  */
 809       cs = cs_enter ();
 810       saved_temphead = temphead;
 811       temphead = NULL;
 812
 813       pid = fork ();
 814       saved_errno = errno;
 815       if (pid)
 816         temphead = saved_temphead;
 817
 818       cs_leave (cs);
 819       errno = saved_errno;
 820
 821       if (0 <= pid || errno != EAGAIN)
 822         break;
 823       else
 824         {
 825           sleep (wait_retry);
 826           wait_retry *= 2;
 827           reap_some ();
 828         }
 829     }
 830
 831   if (pid < 0)
 832     {
 833       close (pipefds[0]);
 834       close (pipefds[1]);
 835     }
 836   else if (pid == 0)
 837     {
 838       close (STDIN_FILENO);
 839       close (STDOUT_FILENO);
 840     }
 841   else
 842     ++nprocs;
 843
 844   return pid;
 845
 846 #else  /* ! HAVE_WORKING_FORK */
 847   return -1;
 848 #endif
 849 }
 850
 851 /* Create a temporary file and start a compression program to filter output
 852    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 853    set it to the PID of the newly-created process.  */
 854
 855 static char *
 856 create_temp (FILE **pfp, pid_t *ppid)
 857 {
 858   int tempfd;
 859   struct tempnode *node = create_temp_file (&tempfd);
 860   char *name = node->name;
 861
 862   if (compress_program)
 863     {
 864       int pipefds[2];
 865
 866       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 867       if (0 < node->pid)
 868         {
 869           close (tempfd);
 870           close (pipefds[0]);
 871           tempfd = pipefds[1];
 872
 873           register_proc (node->pid);
 874         }
 875       else if (node->pid == 0)
 876         {
 877           close (pipefds[1]);
 878           dup2_or_die (tempfd, STDOUT_FILENO);
 879           close (tempfd);
 880           dup2_or_die (pipefds[0], STDIN_FILENO);
 881           close (pipefds[0]);
 882
 883           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 884             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 885                    compress_program);
 886         }
 887       else
 888         node->pid = 0;
 889     }
 890
 891   *pfp = fdopen (tempfd, "w");
 892   if (! *pfp)
 893     die (_("couldn't create temporary file"), name);
 894
 895   if (ppid)
 896     *ppid = node->pid;
 897
 898   return name;
 899 }
 900
 901 /* Open a compressed temp file and start a decompression process through
 902    which to filter the input.  PID must be the valid processes ID of the
 903    process used to compress the file.  */
 904
 905 static FILE *
 906 open_temp (const char *name, pid_t pid)
 907 {
 908   int tempfd, pipefds[2];
 909   pid_t child_pid;
 910   FILE *fp;
 911
 912   wait_proc (pid);
 913
 914   tempfd = open (name, O_RDONLY);
 915   if (tempfd < 0)
 916     die (_("couldn't open temporary file"), name);
 917
 918   child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
 919   if (0 < child_pid)
 920     {
 921       close (tempfd);
 922       close (pipefds[1]);
 923     }
 924   else if (child_pid == 0)
 925     {
 926       close (pipefds[0]);
 927       dup2_or_die (tempfd, STDIN_FILENO);
 928       close (tempfd);
 929       dup2_or_die (pipefds[1], STDOUT_FILENO);
 930       close (pipefds[1]);
 931
 932       if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
 933         error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
 934                compress_program);
 935     }
 936   else
 937     error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
 938            compress_program);
 939
 940   fp = fdopen (pipefds[0], "r");
 941   if (! fp)
 942     die (_("couldn't create temporary file"), name);
 943
 944   return fp;
 945 }
 946
 947 static void
 948 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 949 {
 950   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 951     die (_("write failed"), output_file);
 952 }
 953
 954 /* Append DIR to the array of temporary directory names.  */
 955 static void
 956 add_temp_dir (char const *dir)
 957 {
 958   if (temp_dir_count == temp_dir_alloc)
 959     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
 960
 961   temp_dirs[temp_dir_count++] = dir;
 962 }
 963
 964 /* Remove NAME from the list of temporary files.  */
 965
 966 static void
 967 zaptemp (const char *name)
 968 {
 969   struct tempnode *volatile *pnode;
 970   struct tempnode *node;
 971   struct tempnode *next;
 972   int unlink_status;
 973   int unlink_errno = 0;
 974   struct cs_status cs;
 975
 976   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
 977     continue;
 978
 979   /* Unlink the temporary file in a critical section to avoid races.  */
 980   next = node->next;
 981   cs = cs_enter ();
 982   unlink_status = unlink (name);
 983   unlink_errno = errno;
 984   *pnode = next;
 985   cs_leave (cs);
 986
 987   if (unlink_status != 0)
 988     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
 989   if (! next)
 990     temptail = pnode;
 991   free (node);
 992 }
 993
 994 #if HAVE_NL_LANGINFO
 995
 996 static int
 997 struct_month_cmp (const void *m1, const void *m2)
 998 {
 999   struct month const *month1 = m1;
1000   struct month const *month2 = m2;
1001   return strcmp (month1->name, month2->name);
1002 }
1003
1004 #endif
1005
1006 /* Initialize the character class tables. */
1007
1008 static void
1009 inittables (void)
1010 {
1011   size_t i;
1012
1013   for (i = 0; i < UCHAR_LIM; ++i)
1014     {
1015       blanks[i] = !! isblank (i);
1016       nonprinting[i] = ! isprint (i);
1017       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1018       fold_toupper[i] = toupper (i);
1019     }
1020
1021 #if HAVE_NL_LANGINFO
1022   /* If we're not in the "C" locale, read different names for months.  */
1023   if (hard_LC_TIME)
1024     {
1025       for (i = 0; i < MONTHS_PER_YEAR; i++)
1026         {
1027           char const *s;
1028           size_t s_len;
1029           size_t j;
1030           char *name;
1031
1032           s = (char *) nl_langinfo (ABMON_1 + i);
1033           s_len = strlen (s);
1034           monthtab[i].name = name = xmalloc (s_len + 1);
1035           monthtab[i].val = i + 1;
1036
1037           for (j = 0; j < s_len; j++)
1038             name[j] = fold_toupper[to_uchar (s[j])];
1039           name[j] = '\0';
1040         }
1041       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1042              sizeof *monthtab, struct_month_cmp);
1043     }
1044 #endif
1045 }
1046
1047 /* Specify the amount of main memory to use when sorting.  */
1048 static void
1049 specify_sort_size (int oi, char c, char const *s)
1050 {
1051   uintmax_t n;
1052   char *suffix;
1053   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1054
1055   /* The default unit is KiB.  */
1056   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1057     {
1058       if (n <= UINTMAX_MAX / 1024)
1059         n *= 1024;
1060       else
1061         e = LONGINT_OVERFLOW;
1062     }
1063
1064   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1065   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1066     switch (suffix[0])
1067       {
1068       case 'b':
1069         e = LONGINT_OK;
1070         break;
1071
1072       case '%':
1073         {
1074           double mem = physmem_total () * n / 100;
1075
1076           /* Use "<", not "<=", to avoid problems with rounding.  */
1077           if (mem < UINTMAX_MAX)
1078             {
1079               n = mem;
1080               e = LONGINT_OK;
1081             }
1082           else
1083             e = LONGINT_OVERFLOW;
1084         }
1085         break;
1086       }
1087
1088   if (e == LONGINT_OK)
1089     {
1090       /* If multiple sort sizes are specified, take the maximum, so
1091          that option order does not matter.  */
1092       if (n < sort_size)
1093         return;
1094
1095       sort_size = n;
1096       if (sort_size == n)
1097         {
1098           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1099           return;
1100         }
1101
1102       e = LONGINT_OVERFLOW;
1103     }
1104
1105   xstrtol_fatal (e, oi, c, long_options, s);
1106 }
1107
1108 /* Return the default sort size.  */
1109 static size_t
1110 default_sort_size (void)
1111 {
1112   /* Let MEM be available memory or 1/8 of total memory, whichever
1113      is greater.  */
1114   double avail = physmem_available ();
1115   double total = physmem_total ();
1116   double mem = MAX (avail, total / 8);
1117   struct rlimit rlimit;
1118
1119   /* Let SIZE be MEM, but no more than the maximum object size or
1120      system resource limits.  Avoid the MIN macro here, as it is not
1121      quite right when only one argument is floating point.  Don't
1122      bother to check for values like RLIM_INFINITY since in practice
1123      they are not much less than SIZE_MAX.  */
1124   size_t size = SIZE_MAX;
1125   if (mem < size)
1126     size = mem;
1127   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1128     size = rlimit.rlim_cur;
1129 #ifdef RLIMIT_AS
1130   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1131     size = rlimit.rlim_cur;
1132 #endif
1133
1134   /* Leave a large safety margin for the above limits, as failure can
1135      occur when they are exceeded.  */
1136   size /= 2;
1137
1138 #ifdef RLIMIT_RSS
1139   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1140      Exceeding RSS is not fatal, but can be quite slow.  */
1141   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1142     size = rlimit.rlim_cur / 16 * 15;
1143 #endif
1144
1145   /* Use no less than the minimum.  */
1146   return MAX (size, MIN_SORT_SIZE);
1147 }
1148
1149 /* Return the sort buffer size to use with the input files identified
1150    by FPS and FILES, which are alternate names of the same files.
1151    NFILES gives the number of input files; NFPS may be less.  Assume
1152    that each input line requires LINE_BYTES extra bytes' worth of line
1153    information.  Do not exceed the size bound specified by the user
1154    (or a default size bound, if the user does not specify one).  */
1155
1156 static size_t
1157 sort_buffer_size (FILE *const *fps, size_t nfps,
1158                   char *const *files, size_t nfiles,
1159                   size_t line_bytes)
1160 {
1161   /* A bound on the input size.  If zero, the bound hasn't been
1162      determined yet.  */
1163   static size_t size_bound;
1164
1165   /* In the worst case, each input byte is a newline.  */
1166   size_t worst_case_per_input_byte = line_bytes + 1;
1167
1168   /* Keep enough room for one extra input line and an extra byte.
1169      This extra room might be needed when preparing to read EOF.  */
1170   size_t size = worst_case_per_input_byte + 1;
1171
1172   size_t i;
1173
1174   for (i = 0; i < nfiles; i++)
1175     {
1176       struct stat st;
1177       off_t file_size;
1178       size_t worst_case;
1179
1180       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1181            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1182            : stat (files[i], &st))
1183           != 0)
1184         die (_("stat failed"), files[i]);
1185
1186       if (S_ISREG (st.st_mode))
1187         file_size = st.st_size;
1188       else
1189         {
1190           /* The file has unknown size.  If the user specified a sort
1191              buffer size, use that; otherwise, guess the size.  */
1192           if (sort_size)
1193             return sort_size;
1194           file_size = INPUT_FILE_SIZE_GUESS;
1195         }
1196
1197       if (! size_bound)
1198         {
1199           size_bound = sort_size;
1200           if (! size_bound)
1201             size_bound = default_sort_size ();
1202         }
1203
1204       /* Add the amount of memory needed to represent the worst case
1205          where the input consists entirely of newlines followed by a
1206          single non-newline.  Check for overflow.  */
1207       worst_case = file_size * worst_case_per_input_byte + 1;
1208       if (file_size != worst_case / worst_case_per_input_byte
1209           || size_bound - size <= worst_case)
1210         return size_bound;
1211       size += worst_case;
1212     }
1213
1214   return size;
1215 }
1216
1217 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1218    must be at least sizeof (struct line).  Allocate ALLOC bytes
1219    initially.  */
1220
1221 static void
1222 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1223 {
1224   /* Ensure that the line array is properly aligned.  If the desired
1225      size cannot be allocated, repeatedly halve it until allocation
1226      succeeds.  The smaller allocation may hurt overall performance,
1227      but that's better than failing.  */
1228   for (;;)
1229     {
1230       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1231       buf->buf = malloc (alloc);
1232       if (buf->buf)
1233         break;
1234       alloc /= 2;
1235       if (alloc <= line_bytes + 1)
1236         xalloc_die ();
1237     }
1238
1239   buf->line_bytes = line_bytes;
1240   buf->alloc = alloc;
1241   buf->used = buf->left = buf->nlines = 0;
1242   buf->eof = false;
1243 }
1244
1245 /* Return one past the limit of the line array.  */
1246
1247 static inline struct line *
1248 buffer_linelim (struct buffer const *buf)
1249 {
1250   return (struct line *) (buf->buf + buf->alloc);
1251 }
1252
1253 /* Return a pointer to the first character of the field specified
1254    by KEY in LINE. */
1255
1256 static char *
1257 begfield (const struct line *line, const struct keyfield *key)
1258 {
1259   char *ptr = line->text, *lim = ptr + line->length - 1;
1260   size_t sword = key->sword;
1261   size_t schar = key->schar;
1262   size_t remaining_bytes;
1263
1264   /* The leading field separator itself is included in a field when -t
1265      is absent.  */
1266
1267   if (tab != TAB_DEFAULT)
1268     while (ptr < lim && sword--)
1269       {
1270         while (ptr < lim && *ptr != tab)
1271           ++ptr;
1272         if (ptr < lim)
1273           ++ptr;
1274       }
1275   else
1276     while (ptr < lim && sword--)
1277       {
1278         while (ptr < lim && blanks[to_uchar (*ptr)])
1279           ++ptr;
1280         while (ptr < lim && !blanks[to_uchar (*ptr)])
1281           ++ptr;
1282       }
1283
1284   if (key->skipsblanks)
1285     while (ptr < lim && blanks[to_uchar (*ptr)])
1286       ++ptr;
1287
1288   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1289   remaining_bytes = lim - ptr;
1290   if (schar < remaining_bytes)
1291     ptr += schar;
1292   else
1293     ptr = lim;
1294
1295   return ptr;
1296 }
1297
1298 /* Return the limit of (a pointer to the first character after) the field
1299    in LINE specified by KEY. */
1300
1301 static char *
1302 limfield (const struct line *line, const struct keyfield *key)
1303 {
1304   char *ptr = line->text, *lim = ptr + line->length - 1;
1305   size_t eword = key->eword, echar = key->echar;
1306   size_t remaining_bytes;
1307
1308   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1309      whichever comes first.  If there are more than EWORD fields, leave
1310      PTR pointing at the beginning of the field having zero-based index,
1311      EWORD.  If a delimiter character was specified (via -t), then that
1312      `beginning' is the first character following the delimiting TAB.
1313      Otherwise, leave PTR pointing at the first `blank' character after
1314      the preceding field.  */
1315   if (tab != TAB_DEFAULT)
1316     while (ptr < lim && eword--)
1317       {
1318         while (ptr < lim && *ptr != tab)
1319           ++ptr;
1320         if (ptr < lim && (eword | echar))
1321           ++ptr;
1322       }
1323   else
1324     while (ptr < lim && eword--)
1325       {
1326         while (ptr < lim && blanks[to_uchar (*ptr)])
1327           ++ptr;
1328         while (ptr < lim && !blanks[to_uchar (*ptr)])
1329           ++ptr;
1330       }
1331
1332 #ifdef POSIX_UNSPECIFIED
1333   /* The following block of code makes GNU sort incompatible with
1334      standard Unix sort, so it's ifdef'd out for now.
1335      The POSIX spec isn't clear on how to interpret this.
1336      FIXME: request clarification.
1337
1338      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1339      Date: Thu, 30 May 96 12:20:41 -0400
1340      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1341
1342      [...]I believe I've found another bug in `sort'.
1343
1344      $ cat /tmp/sort.in
1345      a b c 2 d
1346      pq rs 1 t
1347      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1348      a b c 2 d
1349      pq rs 1 t
1350      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1351      pq rs 1 t
1352      a b c 2 d
1353
1354      Unix sort produced the answer I expected: sort on the single character
1355      in column 7.  GNU sort produced different results, because it disagrees
1356      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1357      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1358      "skip M-1 fields, then either N-1 characters or the rest of the current
1359      field, whichever comes first".  This extra clause applies only to
1360      key-ends, not key-starts.
1361      */
1362
1363   /* Make LIM point to the end of (one byte past) the current field.  */
1364   if (tab != TAB_DEFAULT)
1365     {
1366       char *newlim;
1367       newlim = memchr (ptr, tab, lim - ptr);
1368       if (newlim)
1369         lim = newlim;
1370     }
1371   else
1372     {
1373       char *newlim;
1374       newlim = ptr;
1375       while (newlim < lim && blanks[to_uchar (*newlim)])
1376         ++newlim;
1377       while (newlim < lim && !blanks[to_uchar (*newlim)])
1378         ++newlim;
1379       lim = newlim;
1380     }
1381 #endif
1382
1383   /* If we're ignoring leading blanks when computing the End
1384      of the field, don't start counting bytes until after skipping
1385      past any leading blanks. */
1386   if (key->skipeblanks)
1387     while (ptr < lim && blanks[to_uchar (*ptr)])
1388       ++ptr;
1389
1390   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1391   remaining_bytes = lim - ptr;
1392   if (echar < remaining_bytes)
1393     ptr += echar;
1394   else
1395     ptr = lim;
1396
1397   return ptr;
1398 }
1399
1400 /* Fill BUF reading from FP, moving buf->left bytes from the end
1401    of buf->buf to the beginning first.  If EOF is reached and the
1402    file wasn't terminated by a newline, supply one.  Set up BUF's line
1403    table too.  FILE is the name of the file corresponding to FP.
1404    Return true if some input was read.  */
1405
1406 static bool
1407 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1408 {
1409   struct keyfield const *key = keylist;
1410   char eol = eolchar;
1411   size_t line_bytes = buf->line_bytes;
1412   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1413
1414   if (buf->eof)
1415     return false;
1416
1417   if (buf->used != buf->left)
1418     {
1419       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1420       buf->used = buf->left;
1421       buf->nlines = 0;
1422     }
1423
1424   for (;;)
1425     {
1426       char *ptr = buf->buf + buf->used;
1427       struct line *linelim = buffer_linelim (buf);
1428       struct line *line = linelim - buf->nlines;
1429       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1430       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1431
1432       while (line_bytes + 1 < avail)
1433         {
1434           /* Read as many bytes as possible, but do not read so many
1435              bytes that there might not be enough room for the
1436              corresponding line array.  The worst case is when the
1437              rest of the input file consists entirely of newlines,
1438              except that the last byte is not a newline.  */
1439           size_t readsize = (avail - 1) / (line_bytes + 1);
1440           size_t bytes_read = fread (ptr, 1, readsize, fp);
1441           char *ptrlim = ptr + bytes_read;
1442           char *p;
1443           avail -= bytes_read;
1444
1445           if (bytes_read != readsize)
1446             {
1447               if (ferror (fp))
1448                 die (_("read failed"), file);
1449               if (feof (fp))
1450                 {
1451                   buf->eof = true;
1452                   if (buf->buf == ptrlim)
1453                     return false;
1454                   if (ptrlim[-1] != eol)
1455                     *ptrlim++ = eol;
1456                 }
1457             }
1458
1459           /* Find and record each line in the just-read input.  */
1460           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1461             {
1462               ptr = p + 1;
1463               line--;
1464               line->text = line_start;
1465               line->length = ptr - line_start;
1466               mergesize = MAX (mergesize, line->length);
1467               avail -= line_bytes;
1468
1469               if (key)
1470                 {
1471                   /* Precompute the position of the first key for
1472                      efficiency.  */
1473                   line->keylim = (key->eword == SIZE_MAX
1474                                   ? p
1475                                   : limfield (line, key));
1476
1477                   if (key->sword != SIZE_MAX)
1478                     line->keybeg = begfield (line, key);
1479                   else
1480                     {
1481                       if (key->skipsblanks)
1482                         while (blanks[to_uchar (*line_start)])
1483                           line_start++;
1484                       line->keybeg = line_start;
1485                     }
1486                 }
1487
1488               line_start = ptr;
1489             }
1490
1491           ptr = ptrlim;
1492           if (buf->eof)
1493             break;
1494         }
1495
1496       buf->used = ptr - buf->buf;
1497       buf->nlines = buffer_linelim (buf) - line;
1498       if (buf->nlines != 0)
1499         {
1500           buf->left = ptr - line_start;
1501           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1502           return true;
1503         }
1504
1505       {
1506         /* The current input line is too long to fit in the buffer.
1507            Double the buffer size and try again, keeping it properly
1508            aligned.  */
1509         size_t line_alloc = buf->alloc / sizeof (struct line);
1510         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1511         buf->alloc = line_alloc * sizeof (struct line);
1512       }
1513     }
1514 }
1515
1516 /* Compare strings A and B as numbers without explicitly converting them to
1517    machine numbers.  Comparatively slow for short strings, but asymptotically
1518    hideously fast. */
1519
1520 static int
1521 numcompare (const char *a, const char *b)
1522 {
1523   while (blanks[to_uchar (*a)])
1524     a++;
1525   while (blanks[to_uchar (*b)])
1526     b++;
1527
1528   return strnumcmp (a, b, decimal_point, thousands_sep);
1529 }
1530
1531 static int
1532 general_numcompare (const char *sa, const char *sb)
1533 {
1534   /* FIXME: add option to warn about failed conversions.  */
1535   /* FIXME: maybe add option to try expensive FP conversion
1536      only if A and B can't be compared more cheaply/accurately.  */
1537
1538   char *ea;
1539   char *eb;
1540   double a = strtod (sa, &ea);
1541   double b = strtod (sb, &eb);
1542
1543   /* Put conversion errors at the start of the collating sequence.  */
1544   if (sa == ea)
1545     return sb == eb ? 0 : -1;
1546   if (sb == eb)
1547     return 1;
1548
1549   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1550      conversion errors but before numbers; sort them by internal
1551      bit-pattern, for lack of a more portable alternative.  */
1552   return (a < b ? -1
1553           : a > b ? 1
1554           : a == b ? 0
1555           : b == b ? -1
1556           : a == a ? 1
1557           : memcmp ((char *) &a, (char *) &b, sizeof a));
1558 }
1559
1560 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1561    Return 0 if the name in S is not recognized.  */
1562
1563 static int
1564 getmonth (char const *month, size_t len)
1565 {
1566   size_t lo = 0;
1567   size_t hi = MONTHS_PER_YEAR;
1568   char const *monthlim = month + len;
1569
1570   for (;;)
1571     {
1572       if (month == monthlim)
1573         return 0;
1574       if (!blanks[to_uchar (*month)])
1575         break;
1576       ++month;
1577     }
1578
1579   do
1580     {
1581       size_t ix = (lo + hi) / 2;
1582       char const *m = month;
1583       char const *n = monthtab[ix].name;
1584
1585       for (;; m++, n++)
1586         {
1587           if (!*n)
1588             return monthtab[ix].val;
1589           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1590             {
1591               hi = ix;
1592               break;
1593             }
1594           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1595             {
1596               lo = ix + 1;
1597               break;
1598             }
1599         }
1600     }
1601   while (lo < hi);
1602
1603   return 0;
1604 }
1605
1606 /* A source of random data.  */
1607 static struct randread_source *randread_source;
1608
1609 /* Return the Ith randomly-generated state.  The caller must invoke
1610    random_state (H) for all H less than I before invoking random_state
1611    (I).  */
1612
1613 static struct md5_ctx
1614 random_state (size_t i)
1615 {
1616   /* An array of states resulting from the random data, and counts of
1617      its used and allocated members.  */
1618   static struct md5_ctx *state;
1619   static size_t used;
1620   static size_t allocated;
1621
1622   struct md5_ctx *s = &state[i];
1623
1624   if (used <= i)
1625     {
1626       unsigned char buf[MD5_DIGEST_SIZE];
1627
1628       used++;
1629
1630       if (allocated <= i)
1631         {
1632           state = X2NREALLOC (state, &allocated);
1633           s = &state[i];
1634         }
1635
1636       randread (randread_source, buf, sizeof buf);
1637       md5_init_ctx (s);
1638       md5_process_bytes (buf, sizeof buf, s);
1639     }
1640
1641   return *s;
1642 }
1643
1644 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1645    with length LENGTHB.  Return negative if less, zero if equal,
1646    positive if greater.  */
1647
1648 static int
1649 cmp_hashes (char const *texta, size_t lena,
1650             char const *textb, size_t lenb)
1651 {
1652   /* Try random hashes until a pair of hashes disagree.  But if the
1653      first pair of random hashes agree, check whether the keys are
1654      identical and if so report no difference.  */
1655   int diff;
1656   size_t i;
1657   for (i = 0; ; i++)
1658     {
1659       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1660       struct md5_ctx s[2];
1661       s[0] = s[1] = random_state (i);
1662       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1663       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1664       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1665       if (diff != 0)
1666         break;
1667       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1668         break;
1669     }
1670
1671   return diff;
1672 }
1673
1674 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1675    using one or more random hash functions.  */
1676
1677 static int
1678 compare_random (char *restrict texta, size_t lena,
1679                 char *restrict textb, size_t lenb)
1680 {
1681   int diff;
1682
1683   if (! hard_LC_COLLATE)
1684     diff = cmp_hashes (texta, lena, textb, lenb);
1685   else
1686     {
1687       /* Transform the text into the basis of comparison, so that byte
1688          strings that would otherwise considered to be equal are
1689          considered equal here even if their bytes differ.  */
1690
1691       char *buf = NULL;
1692       char stackbuf[4000];
1693       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1694       bool a_fits = tlena <= sizeof stackbuf;
1695       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1696                                (a_fits ? sizeof stackbuf - tlena : 0),
1697                                textb, lenb);
1698
1699       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1700         buf = stackbuf;
1701       else
1702         {
1703           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1704              faster by avoiding the need for an extra buffer copy.  */
1705           buf = xmalloc (tlena + tlenb + 1);
1706           xmemxfrm (buf, tlena + 1, texta, lena);
1707           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1708         }
1709
1710       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1711
1712       if (buf != stackbuf)
1713         free (buf);
1714     }
1715
1716   return diff;
1717 }
1718
1719 /* Compare two lines A and B trying every key in sequence until there
1720    are no more keys or a difference is found. */
1721
1722 static int
1723 keycompare (const struct line *a, const struct line *b)
1724 {
1725   struct keyfield const *key = keylist;
1726
1727   /* For the first iteration only, the key positions have been
1728      precomputed for us. */
1729   char *texta = a->keybeg;
1730   char *textb = b->keybeg;
1731   char *lima = a->keylim;
1732   char *limb = b->keylim;
1733
1734   int diff;
1735
1736   for (;;)
1737     {
1738       char const *translate = key->translate;
1739       bool const *ignore = key->ignore;
1740
1741       /* Find the lengths. */
1742       size_t lena = lima <= texta ? 0 : lima - texta;
1743       size_t lenb = limb <= textb ? 0 : limb - textb;
1744
1745       /* Actually compare the fields. */
1746
1747       if (key->random)
1748         diff = compare_random (texta, lena, textb, lenb);
1749       else if (key->numeric | key->general_numeric)
1750         {
1751           char savea = *lima, saveb = *limb;
1752
1753           *lima = *limb = '\0';
1754           diff = ((key->numeric ? numcompare : general_numcompare)
1755                   (texta, textb));
1756           *lima = savea, *limb = saveb;
1757         }
1758       else if (key->month)
1759         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1760       /* Sorting like this may become slow, so in a simple locale the user
1761          can select a faster sort that is similar to ascii sort.  */
1762       else if (hard_LC_COLLATE)
1763         {
1764           if (ignore || translate)
1765             {
1766               char buf[4000];
1767               size_t size = lena + 1 + lenb + 1;
1768               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1769               char *copy_b = copy_a + lena + 1;
1770               size_t new_len_a, new_len_b, i;
1771
1772               /* Ignore and/or translate chars before comparing.  */
1773               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1774                 {
1775                   if (i < lena)
1776                     {
1777                       copy_a[new_len_a] = (translate
1778                                            ? translate[to_uchar (texta[i])]
1779                                            : texta[i]);
1780                       if (!ignore || !ignore[to_uchar (texta[i])])
1781                         ++new_len_a;
1782                     }
1783                   if (i < lenb)
1784                     {
1785                       copy_b[new_len_b] = (translate
1786                                            ? translate[to_uchar (textb[i])]
1787                                            : textb [i]);
1788                       if (!ignore || !ignore[to_uchar (textb[i])])
1789                         ++new_len_b;
1790                     }
1791                 }
1792
1793               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1794
1795               if (sizeof buf < size)
1796                 free (copy_a);
1797             }
1798           else if (lena == 0)
1799             diff = - NONZERO (lenb);
1800           else if (lenb == 0)
1801             goto greater;
1802           else
1803             diff = xmemcoll (texta, lena, textb, lenb);
1804         }
1805       else if (ignore)
1806         {
1807 #define CMP_WITH_IGNORE(A, B)                                           \
1808   do                                                                    \
1809     {                                                                   \
1810           for (;;)                                                      \
1811             {                                                           \
1812               while (texta < lima && ignore[to_uchar (*texta)])         \
1813                 ++texta;                                                \
1814               while (textb < limb && ignore[to_uchar (*textb)])         \
1815                 ++textb;                                                \
1816               if (! (texta < lima && textb < limb))                     \
1817                 break;                                                  \
1818               diff = to_uchar (A) - to_uchar (B);                       \
1819               if (diff)                                                 \
1820                 goto not_equal;                                         \
1821               ++texta;                                                  \
1822               ++textb;                                                  \
1823             }                                                           \
1824                                                                         \
1825           diff = (texta < lima) - (textb < limb);                       \
1826     }                                                                   \
1827   while (0)
1828
1829           if (translate)
1830             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1831                              translate[to_uchar (*textb)]);
1832           else
1833             CMP_WITH_IGNORE (*texta, *textb);
1834         }
1835       else if (lena == 0)
1836         diff = - NONZERO (lenb);
1837       else if (lenb == 0)
1838         goto greater;
1839       else
1840         {
1841           if (translate)
1842             {
1843               while (texta < lima && textb < limb)
1844                 {
1845                   diff = (to_uchar (translate[to_uchar (*texta++)])
1846                           - to_uchar (translate[to_uchar (*textb++)]));
1847                   if (diff)
1848                     goto not_equal;
1849                 }
1850             }
1851           else
1852             {
1853               diff = memcmp (texta, textb, MIN (lena, lenb));
1854               if (diff)
1855                 goto not_equal;
1856             }
1857           diff = lena < lenb ? -1 : lena != lenb;
1858         }
1859
1860       if (diff)
1861         goto not_equal;
1862
1863       key = key->next;
1864       if (! key)
1865         break;
1866
1867       /* Find the beginning and limit of the next field.  */
1868       if (key->eword != SIZE_MAX)
1869         lima = limfield (a, key), limb = limfield (b, key);
1870       else
1871         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1872
1873       if (key->sword != SIZE_MAX)
1874         texta = begfield (a, key), textb = begfield (b, key);
1875       else
1876         {
1877           texta = a->text, textb = b->text;
1878           if (key->skipsblanks)
1879             {
1880               while (texta < lima && blanks[to_uchar (*texta)])
1881                 ++texta;
1882               while (textb < limb && blanks[to_uchar (*textb)])
1883                 ++textb;
1884             }
1885         }
1886     }
1887
1888   return 0;
1889
1890  greater:
1891   diff = 1;
1892  not_equal:
1893   return key->reverse ? -diff : diff;
1894 }
1895
1896 /* Compare two lines A and B, returning negative, zero, or positive
1897    depending on whether A compares less than, equal to, or greater than B. */
1898
1899 static int
1900 compare (const struct line *a, const struct line *b)
1901 {
1902   int diff;
1903   size_t alen, blen;
1904
1905   /* First try to compare on the specified keys (if any).
1906      The only two cases with no key at all are unadorned sort,
1907      and unadorned sort -r. */
1908   if (keylist)
1909     {
1910       diff = keycompare (a, b);
1911       if (diff | unique | stable)
1912         return diff;
1913     }
1914
1915   /* If the keys all compare equal (or no keys were specified)
1916      fall through to the default comparison.  */
1917   alen = a->length - 1, blen = b->length - 1;
1918
1919   if (alen == 0)
1920     diff = - NONZERO (blen);
1921   else if (blen == 0)
1922     diff = 1;
1923   else if (hard_LC_COLLATE)
1924     diff = xmemcoll (a->text, alen, b->text, blen);
1925   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1926     diff = alen < blen ? -1 : alen != blen;
1927
1928   return reverse ? -diff : diff;
1929 }
1930
1931 /* Check that the lines read from FILE_NAME come in order.  Return
1932    true if they are in order.  If CHECKONLY == 'c', also print a
1933    diagnostic (FILE_NAME, line number, contents of line) to stderr if
1934    they are not in order.  */
1935
1936 static bool
1937 check (char const *file_name, char checkonly)
1938 {
1939   FILE *fp = xfopen (file_name, "r");
1940   struct buffer buf;            /* Input buffer. */
1941   struct line temp;             /* Copy of previous line. */
1942   size_t alloc = 0;
1943   uintmax_t line_number = 0;
1944   struct keyfield const *key = keylist;
1945   bool nonunique = ! unique;
1946   bool ordered = true;
1947
1948   initbuf (&buf, sizeof (struct line),
1949            MAX (merge_buffer_size, sort_size));
1950   temp.text = NULL;
1951
1952   while (fillbuf (&buf, fp, file_name))
1953     {
1954       struct line const *line = buffer_linelim (&buf);
1955       struct line const *linebase = line - buf.nlines;
1956
1957       /* Make sure the line saved from the old buffer contents is
1958          less than or equal to the first line of the new buffer. */
1959       if (alloc && nonunique <= compare (&temp, line - 1))
1960         {
1961         found_disorder:
1962           {
1963             if (checkonly == 'c')
1964               {
1965                 struct line const *disorder_line = line - 1;
1966                 uintmax_t disorder_line_number =
1967                   buffer_linelim (&buf) - disorder_line + line_number;
1968                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1969                 fprintf (stderr, _("%s: %s:%s: disorder: "),
1970                          program_name, file_name,
1971                          umaxtostr (disorder_line_number, hr_buf));
1972                 write_bytes (disorder_line->text, disorder_line->length,
1973                              stderr, _("standard error"));
1974               }
1975
1976             ordered = false;
1977             break;
1978           }
1979         }
1980
1981       /* Compare each line in the buffer with its successor.  */
1982       while (linebase < --line)
1983         if (nonunique <= compare (line, line - 1))
1984           goto found_disorder;
1985
1986       line_number += buf.nlines;
1987
1988       /* Save the last line of the buffer.  */
1989       if (alloc < line->length)
1990         {
1991           do
1992             {
1993               alloc *= 2;
1994               if (! alloc)
1995                 {
1996                   alloc = line->length;
1997                   break;
1998                 }
1999             }
2000           while (alloc < line->length);
2001
2002           temp.text = xrealloc (temp.text, alloc);
2003         }
2004       memcpy (temp.text, line->text, line->length);
2005       temp.length = line->length;
2006       if (key)
2007         {
2008           temp.keybeg = temp.text + (line->keybeg - line->text);
2009           temp.keylim = temp.text + (line->keylim - line->text);
2010         }
2011     }
2012
2013   xfclose (fp, file_name);
2014   free (buf.buf);
2015   free (temp.text);
2016   return ordered;
2017 }
2018
2019 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2020    files (all of which are at the start of the FILES array), and
2021    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2022    Close input and output files before returning.
2023    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2024    the output file is standard output.  If OFP is NULL, the output
2025    file has not been opened yet (or written to, if standard output).  */
2026
2027 static void
2028 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2029           FILE *ofp, char const *output_file)
2030 {
2031   FILE *fps[NMERGE];            /* Input streams for each file.  */
2032   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2033   struct line saved;            /* Saved line storage for unique check. */
2034   struct line const *savedline = NULL;
2035                                 /* &saved if there is a saved line. */
2036   size_t savealloc = 0;         /* Size allocated for the saved line. */
2037   struct line const *cur[NMERGE]; /* Current line in each line table. */
2038   struct line const *base[NMERGE]; /* Base of each line table.  */
2039   size_t ord[NMERGE];           /* Table representing a permutation of fps,
2040                                    such that cur[ord[0]] is the smallest line
2041                                    and will be next output. */
2042   size_t i;
2043   size_t j;
2044   size_t t;
2045   struct keyfield const *key = keylist;
2046   saved.text = NULL;
2047
2048   /* Read initial lines from each input file. */
2049   for (i = 0; i < nfiles; )
2050     {
2051       fps[i] = (files[i].pid
2052                 ? open_temp (files[i].name, files[i].pid)
2053                 : xfopen (files[i].name, "r"));
2054       initbuf (&buffer[i], sizeof (struct line),
2055                MAX (merge_buffer_size, sort_size / nfiles));
2056       if (fillbuf (&buffer[i], fps[i], files[i].name))
2057         {
2058           struct line const *linelim = buffer_linelim (&buffer[i]);
2059           cur[i] = linelim - 1;
2060           base[i] = linelim - buffer[i].nlines;
2061           i++;
2062         }
2063       else
2064         {
2065           /* fps[i] is empty; eliminate it from future consideration.  */
2066           xfclose (fps[i], files[i].name);
2067           if (i < ntemps)
2068             {
2069               ntemps--;
2070               zaptemp (files[i].name);
2071             }
2072           free (buffer[i].buf);
2073           --nfiles;
2074           for (j = i; j < nfiles; ++j)
2075             files[j] = files[j + 1];
2076         }
2077     }
2078
2079   if (! ofp)
2080     ofp = xfopen (output_file, "w");
2081
2082   /* Set up the ord table according to comparisons among input lines.
2083      Since this only reorders two items if one is strictly greater than
2084      the other, it is stable. */
2085   for (i = 0; i < nfiles; ++i)
2086     ord[i] = i;
2087   for (i = 1; i < nfiles; ++i)
2088     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2089       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2090
2091   /* Repeatedly output the smallest line until no input remains. */
2092   while (nfiles)
2093     {
2094       struct line const *smallest = cur[ord[0]];
2095
2096       /* If uniquified output is turned on, output only the first of
2097          an identical series of lines. */
2098       if (unique)
2099         {
2100           if (savedline && compare (savedline, smallest))
2101             {
2102               savedline = NULL;
2103               write_bytes (saved.text, saved.length, ofp, output_file);
2104             }
2105           if (!savedline)
2106             {
2107               savedline = &saved;
2108               if (savealloc < smallest->length)
2109                 {
2110                   do
2111                     if (! savealloc)
2112                       {
2113                         savealloc = smallest->length;
2114                         break;
2115                       }
2116                   while ((savealloc *= 2) < smallest->length);
2117
2118                   saved.text = xrealloc (saved.text, savealloc);
2119                 }
2120               saved.length = smallest->length;
2121               memcpy (saved.text, smallest->text, saved.length);
2122               if (key)
2123                 {
2124                   saved.keybeg =
2125                     saved.text + (smallest->keybeg - smallest->text);
2126                   saved.keylim =
2127                     saved.text + (smallest->keylim - smallest->text);
2128                 }
2129             }
2130         }
2131       else
2132         write_bytes (smallest->text, smallest->length, ofp, output_file);
2133
2134       /* Check if we need to read more lines into core. */
2135       if (base[ord[0]] < smallest)
2136         cur[ord[0]] = smallest - 1;
2137       else
2138         {
2139           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2140             {
2141               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2142               cur[ord[0]] = linelim - 1;
2143               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2144             }
2145           else
2146             {
2147               /* We reached EOF on fps[ord[0]].  */
2148               for (i = 1; i < nfiles; ++i)
2149                 if (ord[i] > ord[0])
2150                   --ord[i];
2151               --nfiles;
2152               xfclose (fps[ord[0]], files[ord[0]].name);
2153               if (ord[0] < ntemps)
2154                 {
2155                   ntemps--;
2156                   zaptemp (files[ord[0]].name);
2157                 }
2158               free (buffer[ord[0]].buf);
2159               for (i = ord[0]; i < nfiles; ++i)
2160                 {
2161                   fps[i] = fps[i + 1];
2162                   files[i] = files[i + 1];
2163                   buffer[i] = buffer[i + 1];
2164                   cur[i] = cur[i + 1];
2165                   base[i] = base[i + 1];
2166                 }
2167               for (i = 0; i < nfiles; ++i)
2168                 ord[i] = ord[i + 1];
2169               continue;
2170             }
2171         }
2172
2173       /* The new line just read in may be larger than other lines
2174          already in main memory; push it back in the queue until we
2175          encounter a line larger than it.  Optimize for the common
2176          case where the new line is smallest.  */
2177       {
2178         size_t lo = 1;
2179         size_t hi = nfiles;
2180         size_t probe = lo;
2181         size_t ord0 = ord[0];
2182         size_t count_of_smaller_lines;
2183
2184         while (lo < hi)
2185           {
2186             int cmp = compare (cur[ord0], cur[ord[probe]]);
2187             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2188               hi = probe;
2189             else
2190               lo = probe + 1;
2191             probe = (lo + hi) / 2;
2192           }
2193
2194         count_of_smaller_lines = lo - 1;
2195         for (j = 0; j < count_of_smaller_lines; j++)
2196           ord[j] = ord[j + 1];
2197         ord[count_of_smaller_lines] = ord0;
2198       }
2199
2200       /* Free up some resources every once in a while.  */
2201       if (MAX_PROCS_BEFORE_REAP < nprocs)
2202         reap_some ();
2203     }
2204
2205   if (unique && savedline)
2206     {
2207       write_bytes (saved.text, saved.length, ofp, output_file);
2208       free (saved.text);
2209     }
2210
2211   xfclose (ofp, output_file);
2212 }
2213
2214 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2215    and HI (with NHI members).  T, LO, and HI point just past their
2216    respective arrays, and the arrays are in reverse order.  NLO and
2217    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2218
2219 static inline void
2220 mergelines (struct line *t,
2221             struct line const *lo, size_t nlo,
2222             struct line const *hi, size_t nhi)
2223 {
2224   for (;;)
2225     if (compare (lo - 1, hi - 1) <= 0)
2226       {
2227         *--t = *--lo;
2228         if (! --nlo)
2229           {
2230             /* HI - NHI equalled T - (NLO + NHI) when this function
2231                began.  Therefore HI must equal T now, and there is no
2232                need to copy from HI to T.  */
2233             return;
2234           }
2235       }
2236     else
2237       {
2238         *--t = *--hi;
2239         if (! --nhi)
2240           {
2241             do
2242               *--t = *--lo;
2243             while (--nlo);
2244
2245             return;
2246           }
2247       }
2248 }
2249
2250 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2251    NLINES must be at least 2.
2252    The input and output arrays are in reverse order, and LINES and
2253    TEMP point just past the end of their respective arrays.
2254
2255    Use a recursive divide-and-conquer algorithm, in the style
2256    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2257    the optimization suggested by exercise 5.2.4-10; this requires room
2258    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2259    writes that this memory optimization was originally published by
2260    D. A. Bell, Comp J. 1 (1958), 75.  */
2261
2262 static void
2263 sortlines (struct line *lines, size_t nlines, struct line *temp)
2264 {
2265   if (nlines == 2)
2266     {
2267       if (0 < compare (&lines[-1], &lines[-2]))
2268         {
2269           struct line tmp = lines[-1];
2270           lines[-1] = lines[-2];
2271           lines[-2] = tmp;
2272         }
2273     }
2274   else
2275     {
2276       size_t nlo = nlines / 2;
2277       size_t nhi = nlines - nlo;
2278       struct line *lo = lines;
2279       struct line *hi = lines - nlo;
2280       struct line *sorted_lo = temp;
2281
2282       sortlines (hi, nhi, temp);
2283       if (1 < nlo)
2284         sortlines_temp (lo, nlo, sorted_lo);
2285       else
2286         sorted_lo[-1] = lo[-1];
2287
2288       mergelines (lines, sorted_lo, nlo, hi, nhi);
2289     }
2290 }
2291
2292 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2293    rather than sorting in place.  */
2294
2295 static void
2296 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2297 {
2298   if (nlines == 2)
2299     {
2300       /* Declare `swap' as int, not bool, to work around a bug
2301          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2302          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2303       int swap = (0 < compare (&lines[-1], &lines[-2]));
2304       temp[-1] = lines[-1 - swap];
2305       temp[-2] = lines[-2 + swap];
2306     }
2307   else
2308     {
2309       size_t nlo = nlines / 2;
2310       size_t nhi = nlines - nlo;
2311       struct line *lo = lines;
2312       struct line *hi = lines - nlo;
2313       struct line *sorted_hi = temp - nlo;
2314
2315       sortlines_temp (hi, nhi, sorted_hi);
2316       if (1 < nlo)
2317         sortlines (lo, nlo, temp);
2318
2319       mergelines (temp, lo, nlo, sorted_hi, nhi);
2320     }
2321 }
2322
2323 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2324    the same as OUTFILE.  If found, merge the found instances (and perhaps
2325    some other files) into a temporary file so that it can in turn be
2326    merged into OUTFILE without destroying OUTFILE before it is completely
2327    read.  Return the new value of NFILES, which differs from the old if
2328    some merging occurred.
2329
2330    This test ensures that an otherwise-erroneous use like
2331    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2332    It's not clear that POSIX requires this nicety.
2333    Detect common error cases, but don't try to catch obscure cases like
2334    "cat ... FILE ... | sort -m -o FILE"
2335    where traditional "sort" doesn't copy the input and where
2336    people should know that they're getting into trouble anyway.
2337    Catching these obscure cases would slow down performance in
2338    common cases.  */
2339
2340 static size_t
2341 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2342                       size_t nfiles, char const *outfile)
2343 {
2344   size_t i;
2345   bool got_outstat = false;
2346   struct stat outstat;
2347
2348   for (i = ntemps; i < nfiles; i++)
2349     {
2350       bool is_stdin = STREQ (files[i].name, "-");
2351       bool same;
2352       struct stat instat;
2353
2354       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2355         same = true;
2356       else
2357         {
2358           if (! got_outstat)
2359             {
2360               if ((outfile
2361                    ? stat (outfile, &outstat)
2362                    : fstat (STDOUT_FILENO, &outstat))
2363                   != 0)
2364                 break;
2365               got_outstat = true;
2366             }
2367
2368           same = (((is_stdin
2369                     ? fstat (STDIN_FILENO, &instat)
2370                     : stat (files[i].name, &instat))
2371                    == 0)
2372                   && SAME_INODE (instat, outstat));
2373         }
2374
2375       if (same)
2376         {
2377           FILE *tftp;
2378           pid_t pid;
2379           char *temp = create_temp (&tftp, &pid);
2380           mergefps (&files[i],0, nfiles - i, tftp, temp);
2381           files[i].name = temp;
2382           files[i].pid = pid;
2383           return i + 1;
2384         }
2385     }
2386
2387   return nfiles;
2388 }
2389
2390 /* Merge the input FILES.  NTEMPS is the number of files at the
2391    start of FILES that are temporary; it is zero at the top level.
2392    NFILES is the total number of files.  Put the output in
2393    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2394
2395 static void
2396 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2397        char const *output_file)
2398 {
2399   while (NMERGE < nfiles)
2400     {
2401       /* Number of input files processed so far.  */
2402       size_t in;
2403
2404       /* Number of output files generated so far.  */
2405       size_t out;
2406
2407       /* nfiles % NMERGE; this counts input files that are left over
2408          after all full-sized merges have been done.  */
2409       size_t remainder;
2410
2411       /* Number of easily-available slots at the next loop iteration.  */
2412       size_t cheap_slots;
2413
2414       /* Do as many NMERGE-size merges as possible.  */
2415       for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2416         {
2417           FILE *tfp;
2418           pid_t pid;
2419           char *temp = create_temp (&tfp, &pid);
2420           size_t nt = MIN (ntemps, NMERGE);
2421           ntemps -= nt;
2422           mergefps (&files[in], nt, NMERGE, tfp, temp);
2423           files[out].name = temp;
2424           files[out].pid = pid;
2425         }
2426
2427       remainder = nfiles - in;
2428       cheap_slots = NMERGE - out % NMERGE;
2429
2430       if (cheap_slots < remainder)
2431         {
2432           /* So many files remain that they can't all be put into the last
2433              NMERGE-sized output window.  Do one more merge.  Merge as few
2434              files as possible, to avoid needless I/O.  */
2435           size_t nshortmerge = remainder - cheap_slots + 1;
2436           FILE *tfp;
2437           pid_t pid;
2438           char *temp = create_temp (&tfp, &pid);
2439           size_t nt = MIN (ntemps, nshortmerge);
2440           ntemps -= nt;
2441           mergefps (&files[in], nt, nshortmerge, tfp, temp);
2442           files[out].name = temp;
2443           files[out++].pid = pid;
2444           in += nshortmerge;
2445         }
2446
2447       /* Put the remaining input files into the last NMERGE-sized output
2448          window, so they will be merged in the next pass.  */
2449       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2450       ntemps += out;
2451       nfiles -= in - out;
2452     }
2453
2454   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2455   mergefps (files, ntemps, nfiles, NULL, output_file);
2456 }
2457
2458 /* Sort NFILES FILES onto OUTPUT_FILE. */
2459
2460 static void
2461 sort (char * const *files, size_t nfiles, char const *output_file)
2462 {
2463   struct buffer buf;
2464   size_t ntemps = 0;
2465   bool output_file_created = false;
2466
2467   buf.alloc = 0;
2468
2469   while (nfiles)
2470     {
2471       char const *temp_output;
2472       char const *file = *files;
2473       FILE *fp = xfopen (file, "r");
2474       FILE *tfp;
2475       size_t bytes_per_line = (2 * sizeof (struct line)
2476                                - sizeof (struct line) / 2);
2477
2478       if (! buf.alloc)
2479         initbuf (&buf, bytes_per_line,
2480                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2481       buf.eof = false;
2482       files++;
2483       nfiles--;
2484
2485       while (fillbuf (&buf, fp, file))
2486         {
2487           struct line *line;
2488           struct line *linebase;
2489
2490           if (buf.eof && nfiles
2491               && (bytes_per_line + 1
2492                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2493             {
2494               /* End of file, but there is more input and buffer room.
2495                  Concatenate the next input file; this is faster in
2496                  the usual case.  */
2497               buf.left = buf.used;
2498               break;
2499             }
2500
2501           line = buffer_linelim (&buf);
2502           linebase = line - buf.nlines;
2503           if (1 < buf.nlines)
2504             sortlines (line, buf.nlines, linebase);
2505           if (buf.eof && !nfiles && !ntemps && !buf.left)
2506             {
2507               xfclose (fp, file);
2508               tfp = xfopen (output_file, "w");
2509               temp_output = output_file;
2510               output_file_created = true;
2511             }
2512           else
2513             {
2514               ++ntemps;
2515               temp_output = create_temp (&tfp, NULL);
2516             }
2517
2518           do
2519             {
2520               line--;
2521               write_bytes (line->text, line->length, tfp, temp_output);
2522               if (unique)
2523                 while (linebase < line && compare (line, line - 1) == 0)
2524                   line--;
2525             }
2526           while (linebase < line);
2527
2528           xfclose (tfp, temp_output);
2529
2530           /* Free up some resources every once in a while.  */
2531           if (MAX_PROCS_BEFORE_REAP < nprocs)
2532             reap_some ();
2533
2534           if (output_file_created)
2535             goto finish;
2536         }
2537       xfclose (fp, file);
2538     }
2539
2540  finish:
2541   free (buf.buf);
2542
2543   if (! output_file_created)
2544     {
2545       size_t i;
2546       struct tempnode *node = temphead;
2547       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2548       for (i = 0; node; i++)
2549         {
2550           tempfiles[i].name = node->name;
2551           tempfiles[i].pid = node->pid;
2552           node = node->next;
2553         }
2554       merge (tempfiles, ntemps, ntemps, output_file);
2555       free (tempfiles);
2556     }
2557 }
2558
2559 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2560
2561 static void
2562 insertkey (struct keyfield *key_arg)
2563 {
2564   struct keyfield **p;
2565   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2566
2567   for (p = &keylist; *p; p = &(*p)->next)
2568     continue;
2569   *p = key;
2570   key->next = NULL;
2571 }
2572
2573 /* Report a bad field specification SPEC, with extra info MSGID.  */
2574
2575 static void badfieldspec (char const *, char const *)
2576      ATTRIBUTE_NORETURN;
2577 static void
2578 badfieldspec (char const *spec, char const *msgid)
2579 {
2580   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2581          _(msgid), quote (spec));
2582   abort ();
2583 }
2584
2585 /* Report incompatible options.  */
2586
2587 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2588 static void
2589 incompatible_options (char const *opts)
2590 {
2591   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2592   abort ();
2593 }
2594
2595 /* Check compatibility of ordering options.  */
2596
2597 static void
2598 check_ordering_compatibility (void)
2599 {
2600   struct keyfield const *key;
2601
2602   for (key = keylist; key; key = key->next)
2603     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2604               + !!key->ignore))
2605         || (key->random && key->translate))
2606       {
2607         char opts[7];
2608         char *p = opts;
2609         if (key->ignore == nondictionary)
2610           *p++ = 'd';
2611         if (key->translate)
2612           *p++ = 'f';
2613         if (key->general_numeric)
2614           *p++ = 'g';
2615         if (key->ignore == nonprinting)
2616           *p++ = 'i';
2617         if (key->month)
2618           *p++ = 'M';
2619         if (key->numeric)
2620           *p++ = 'n';
2621         if (key->random)
2622           *p++ = 'R';
2623         *p = '\0';
2624         incompatible_options (opts);
2625       }
2626 }
2627
2628 /* Parse the leading integer in STRING and store the resulting value
2629    (which must fit into size_t) into *VAL.  Return the address of the
2630    suffix after the integer.  If the value is too large, silently
2631    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
2632    failure; otherwise, report MSGID and exit on failure.  */
2633
2634 static char const *
2635 parse_field_count (char const *string, size_t *val, char const *msgid)
2636 {
2637   char *suffix;
2638   uintmax_t n;
2639
2640   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2641     {
2642     case LONGINT_OK:
2643     case LONGINT_INVALID_SUFFIX_CHAR:
2644       *val = n;
2645       if (*val == n)
2646         break;
2647       /* Fall through.  */
2648     case LONGINT_OVERFLOW:
2649     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2650       *val = SIZE_MAX;
2651       break;
2652
2653     case LONGINT_INVALID:
2654       if (msgid)
2655         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2656                _(msgid), quote (string));
2657       return NULL;
2658     }
2659
2660   return suffix;
2661 }
2662
2663 /* Handle interrupts and hangups. */
2664
2665 static void
2666 sighandler (int sig)
2667 {
2668   if (! SA_NOCLDSTOP)
2669     signal (sig, SIG_IGN);
2670
2671   cleanup ();
2672
2673   signal (sig, SIG_DFL);
2674   raise (sig);
2675 }
2676
2677 /* Set the ordering options for KEY specified in S.
2678    Return the address of the first character in S that
2679    is not a valid ordering option.
2680    BLANKTYPE is the kind of blanks that 'b' should skip. */
2681
2682 static char *
2683 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2684 {
2685   while (*s)
2686     {
2687       switch (*s)
2688         {
2689         case 'b':
2690           if (blanktype == bl_start || blanktype == bl_both)
2691             key->skipsblanks = true;
2692           if (blanktype == bl_end || blanktype == bl_both)
2693             key->skipeblanks = true;
2694           break;
2695         case 'd':
2696           key->ignore = nondictionary;
2697           break;
2698         case 'f':
2699           key->translate = fold_toupper;
2700           break;
2701         case 'g':
2702           key->general_numeric = true;
2703           break;
2704         case 'i':
2705           /* Option order should not matter, so don't let -i override
2706              -d.  -d implies -i, but -i does not imply -d.  */
2707           if (! key->ignore)
2708             key->ignore = nonprinting;
2709           break;
2710         case 'M':
2711           key->month = true;
2712           break;
2713         case 'n':
2714           key->numeric = true;
2715           break;
2716         case 'R':
2717           key->random = true;
2718           break;
2719         case 'r':
2720           key->reverse = true;
2721           break;
2722         default:
2723           return (char *) s;
2724         }
2725       ++s;
2726     }
2727   return (char *) s;
2728 }
2729
2730 static struct keyfield *
2731 key_init (struct keyfield *key)
2732 {
2733   memset (key, 0, sizeof *key);
2734   key->eword = SIZE_MAX;
2735   return key;
2736 }
2737
2738 int
2739 main (int argc, char **argv)
2740 {
2741   struct keyfield *key;
2742   struct keyfield key_buf;
2743   struct keyfield gkey;
2744   char const *s;
2745   int c = 0;
2746   char checkonly = 0;
2747   bool mergeonly = false;
2748   char *random_source = NULL;
2749   bool need_random = false;
2750   size_t nfiles = 0;
2751   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2752   bool obsolete_usage = (posix2_version () < 200112);
2753   char **files;
2754   char const *outfile = NULL;
2755
2756   initialize_main (&argc, &argv);
2757   set_program_name (argv[0]);
2758   setlocale (LC_ALL, "");
2759   bindtextdomain (PACKAGE, LOCALEDIR);
2760   textdomain (PACKAGE);
2761
2762   initialize_exit_failure (SORT_FAILURE);
2763
2764   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2765 #if HAVE_NL_LANGINFO
2766   hard_LC_TIME = hard_locale (LC_TIME);
2767 #endif
2768
2769   /* Get locale's representation of the decimal point.  */
2770   {
2771     struct lconv const *locale = localeconv ();
2772
2773     /* If the locale doesn't define a decimal point, or if the decimal
2774        point is multibyte, use the C locale's decimal point.  FIXME:
2775        add support for multibyte decimal points.  */
2776     decimal_point = to_uchar (locale->decimal_point[0]);
2777     if (! decimal_point || locale->decimal_point[1])
2778       decimal_point = '.';
2779
2780     /* FIXME: add support for multibyte thousands separators.  */
2781     thousands_sep = to_uchar (*locale->thousands_sep);
2782     if (! thousands_sep || locale->thousands_sep[1])
2783       thousands_sep = -1;
2784   }
2785
2786   have_read_stdin = false;
2787   inittables ();
2788
2789   {
2790     size_t i;
2791     static int const sig[] =
2792       {
2793         /* The usual suspects.  */
2794         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2795 #ifdef SIGPOLL
2796         SIGPOLL,
2797 #endif
2798 #ifdef SIGPROF
2799         SIGPROF,
2800 #endif
2801 #ifdef SIGVTALRM
2802         SIGVTALRM,
2803 #endif
2804 #ifdef SIGXCPU
2805         SIGXCPU,
2806 #endif
2807 #ifdef SIGXFSZ
2808         SIGXFSZ,
2809 #endif
2810       };
2811     enum { nsigs = sizeof sig / sizeof sig[0] };
2812
2813 #if SA_NOCLDSTOP
2814     struct sigaction act;
2815
2816     sigemptyset (&caught_signals);
2817     for (i = 0; i < nsigs; i++)
2818       {
2819         sigaction (sig[i], NULL, &act);
2820         if (act.sa_handler != SIG_IGN)
2821           sigaddset (&caught_signals, sig[i]);
2822       }
2823
2824     act.sa_handler = sighandler;
2825     act.sa_mask = caught_signals;
2826     act.sa_flags = 0;
2827
2828     for (i = 0; i < nsigs; i++)
2829       if (sigismember (&caught_signals, sig[i]))
2830         sigaction (sig[i], &act, NULL);
2831 #else
2832     for (i = 0; i < nsigs; i++)
2833       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2834         {
2835           signal (sig[i], sighandler);
2836           siginterrupt (sig[i], 1);
2837         }
2838 #endif
2839   }
2840
2841   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
2842   atexit (exit_cleanup);
2843
2844   gkey.sword = gkey.eword = SIZE_MAX;
2845   gkey.ignore = NULL;
2846   gkey.translate = NULL;
2847   gkey.numeric = gkey.general_numeric = gkey.random = false;
2848   gkey.month = gkey.reverse = false;
2849   gkey.skipsblanks = gkey.skipeblanks = false;
2850
2851   files = xnmalloc (argc, sizeof *files);
2852
2853   for (;;)
2854     {
2855       /* Parse an operand as a file after "--" was seen; or if
2856          pedantic and a file was seen, unless the POSIX version
2857          predates 1003.1-2001 and -c was not seen and the operand is
2858          "-o FILE" or "-oFILE".  */
2859       int oi = -1;
2860
2861       if (c == -1
2862           || (posixly_correct && nfiles != 0
2863               && ! (obsolete_usage
2864                     && ! checkonly
2865                     && optind != argc
2866                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2867                     && (argv[optind][2] || optind + 1 != argc)))
2868           || ((c = getopt_long (argc, argv, short_options,
2869                                 long_options, &oi))
2870               == -1))
2871         {
2872           if (argc <= optind)
2873             break;
2874           files[nfiles++] = argv[optind++];
2875         }
2876       else switch (c)
2877         {
2878         case 1:
2879           key = NULL;
2880           if (optarg[0] == '+')
2881             {
2882               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2883                                       && ISDIGIT (argv[optind][1]));
2884               obsolete_usage |= minus_pos_usage & ~posixly_correct;
2885               if (obsolete_usage)
2886                 {
2887                   /* Treat +POS1 [-POS2] as a key if possible; but silently
2888                      treat an operand as a file if it is not a valid +POS1.  */
2889                   key = key_init (&key_buf);
2890                   s = parse_field_count (optarg + 1, &key->sword, NULL);
2891                   if (s && *s == '.')
2892                     s = parse_field_count (s + 1, &key->schar, NULL);
2893                   if (! (key->sword | key->schar))
2894                     key->sword = SIZE_MAX;
2895                   if (! s || *set_ordering (s, key, bl_start))
2896                     key = NULL;
2897                   else
2898                     {
2899                       if (minus_pos_usage)
2900                         {
2901                           char const *optarg1 = argv[optind++];
2902                           s = parse_field_count (optarg1 + 1, &key->eword,
2903                                              N_("invalid number after `-'"));
2904                           if (*s == '.')
2905                             s = parse_field_count (s + 1, &key->echar,
2906                                                N_("invalid number after `.'"));
2907                           if (*set_ordering (s, key, bl_end))
2908                             badfieldspec (optarg1,
2909                                       N_("stray character in field spec"));
2910                         }
2911                       insertkey (key);
2912                     }
2913                 }
2914             }
2915           if (! key)
2916             files[nfiles++] = optarg;
2917           break;
2918
2919         case SORT_OPTION:
2920           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
2921           /* Fall through. */
2922         case 'b':
2923         case 'd':
2924         case 'f':
2925         case 'g':
2926         case 'i':
2927         case 'M':
2928         case 'n':
2929         case 'r':
2930         case 'R':
2931           {
2932             char str[2];
2933             str[0] = c;
2934             str[1] = '\0';
2935             set_ordering (str, &gkey, bl_both);
2936           }
2937           break;
2938
2939         case CHECK_OPTION:
2940           c = (optarg
2941                ? XARGMATCH ("--check", optarg, check_args, check_types)
2942                : 'c');
2943           /* Fall through.  */
2944         case 'c':
2945         case 'C':
2946           if (checkonly && checkonly != c)
2947             incompatible_options ("cC");
2948           checkonly = c;
2949           break;
2950
2951         case COMPRESS_PROGRAM_OPTION:
2952           if (compress_program && !STREQ (compress_program, optarg))
2953             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2954           compress_program = optarg;
2955           break;
2956
2957         case 'k':
2958           key = key_init (&key_buf);
2959
2960           /* Get POS1. */
2961           s = parse_field_count (optarg, &key->sword,
2962                                  N_("invalid number at field start"));
2963           if (! key->sword--)
2964             {
2965               /* Provoke with `sort -k0' */
2966               badfieldspec (optarg, N_("field number is zero"));
2967             }
2968           if (*s == '.')
2969             {
2970               s = parse_field_count (s + 1, &key->schar,
2971                                      N_("invalid number after `.'"));
2972               if (! key->schar--)
2973                 {
2974                   /* Provoke with `sort -k1.0' */
2975                   badfieldspec (optarg, N_("character offset is zero"));
2976                 }
2977             }
2978           if (! (key->sword | key->schar))
2979             key->sword = SIZE_MAX;
2980           s = set_ordering (s, key, bl_start);
2981           if (*s != ',')
2982             {
2983               key->eword = SIZE_MAX;
2984               key->echar = 0;
2985             }
2986           else
2987             {
2988               /* Get POS2. */
2989               s = parse_field_count (s + 1, &key->eword,
2990                                      N_("invalid number after `,'"));
2991               if (! key->eword--)
2992                 {
2993                   /* Provoke with `sort -k1,0' */
2994                   badfieldspec (optarg, N_("field number is zero"));
2995                 }
2996               if (*s == '.')
2997                 s = parse_field_count (s + 1, &key->echar,
2998                                        N_("invalid number after `.'"));
2999               else
3000                 {
3001                   /* `-k 2,3' is equivalent to `+1 -3'.  */
3002                   key->eword++;
3003                 }
3004               s = set_ordering (s, key, bl_end);
3005             }
3006           if (*s)
3007             badfieldspec (optarg, N_("stray character in field spec"));
3008           insertkey (key);
3009           break;
3010
3011         case 'm':
3012           mergeonly = true;
3013           break;
3014
3015         case 'o':
3016           if (outfile && !STREQ (outfile, optarg))
3017             error (SORT_FAILURE, 0, _("multiple output files specified"));
3018           outfile = optarg;
3019           break;
3020
3021         case RANDOM_SOURCE_OPTION:
3022           if (random_source && !STREQ (random_source, optarg))
3023             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3024           random_source = optarg;
3025           break;
3026
3027         case 's':
3028           stable = true;
3029           break;
3030
3031         case 'S':
3032           specify_sort_size (oi, c, optarg);
3033           break;
3034
3035         case 't':
3036           {
3037             char newtab = optarg[0];
3038             if (! newtab)
3039               error (SORT_FAILURE, 0, _("empty tab"));
3040             if (optarg[1])
3041               {
3042                 if (STREQ (optarg, "\\0"))
3043                   newtab = '\0';
3044                 else
3045                   {
3046                     /* Provoke with `sort -txx'.  Complain about
3047                        "multi-character tab" instead of "multibyte tab", so
3048                        that the diagnostic's wording does not need to be
3049                        changed once multibyte characters are supported.  */
3050                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3051                            quote (optarg));
3052                   }
3053               }
3054             if (tab != TAB_DEFAULT && tab != newtab)
3055               error (SORT_FAILURE, 0, _("incompatible tabs"));
3056             tab = newtab;
3057           }
3058           break;
3059
3060         case 'T':
3061           add_temp_dir (optarg);
3062           break;
3063
3064         case 'u':
3065           unique = true;
3066           break;
3067
3068         case 'y':
3069           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3070              through Solaris 7.  It is also accepted by many non-Solaris
3071              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3072              -y is marked as obsolete starting with Solaris 8 (1999), but is
3073              still accepted as of Solaris 10 prerelease (2004).
3074
3075              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3076              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3077              and which in general ignores the argument after "-y" if it
3078              consists entirely of digits (it can even be empty).  */
3079           if (optarg == argv[optind - 1])
3080             {
3081               char const *p;
3082               for (p = optarg; ISDIGIT (*p); p++)
3083                 continue;
3084               optind -= (*p != '\0');
3085             }
3086           break;
3087
3088         case 'z':
3089           eolchar = 0;
3090           break;
3091
3092         case_GETOPT_HELP_CHAR;
3093
3094         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3095
3096         default:
3097           usage (SORT_FAILURE);
3098         }
3099     }
3100
3101   /* Inheritance of global options to individual keys. */
3102   for (key = keylist; key; key = key->next)
3103     {
3104       if (! (key->ignore || key->translate
3105              || (key->skipsblanks | key->reverse
3106                  | key->skipeblanks | key->month | key->numeric
3107                  | key->general_numeric
3108                  | key->random)))
3109         {
3110           key->ignore = gkey.ignore;
3111           key->translate = gkey.translate;
3112           key->skipsblanks = gkey.skipsblanks;
3113           key->skipeblanks = gkey.skipeblanks;
3114           key->month = gkey.month;
3115           key->numeric = gkey.numeric;
3116           key->general_numeric = gkey.general_numeric;
3117           key->random = gkey.random;
3118           key->reverse = gkey.reverse;
3119         }
3120
3121       need_random |= key->random;
3122     }
3123
3124   if (!keylist && (gkey.ignore || gkey.translate
3125                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3126                        | gkey.numeric | gkey.general_numeric
3127                        | gkey.random)))
3128     {
3129       insertkey (&gkey);
3130       need_random |= gkey.random;
3131     }
3132
3133   check_ordering_compatibility ();
3134
3135   reverse = gkey.reverse;
3136
3137   if (need_random)
3138     {
3139       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3140       if (! randread_source)
3141         die (_("open failed"), random_source);
3142     }
3143
3144   if (temp_dir_count == 0)
3145     {
3146       char const *tmp_dir = getenv ("TMPDIR");
3147       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3148     }
3149
3150   if (nfiles == 0)
3151     {
3152       static char *minus = "-";
3153       nfiles = 1;
3154       free (files);
3155       files = &minus;
3156     }
3157
3158   if (checkonly)
3159     {
3160       if (nfiles > 1)
3161         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3162                quote (files[1]), checkonly);
3163
3164       if (outfile)
3165         {
3166           static char opts[] = {0, 'o', 0};
3167           opts[0] = checkonly;
3168           incompatible_options (opts);
3169         }
3170
3171       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3172          input is not properly sorted.  */
3173       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3174     }
3175
3176   if (mergeonly)
3177     {
3178       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3179       size_t i;
3180
3181       for (i = 0; i < nfiles; ++i)
3182         sortfiles[i].name = files[i];
3183
3184       merge (sortfiles, 0, nfiles, outfile);
3185       IF_LINT (free (sortfiles));
3186     }
3187   else
3188     sort (files, nfiles, outfile);
3189
3190   if (have_read_stdin && fclose (stdin) == EOF)
3191     die (_("close failed"), "-");
3192
3193   exit (EXIT_SUCCESS);
3194 }