src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-2004 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <signal.h>
  29 #include <stdio.h>
  30 #include "system.h"
  31 #include "error.h"
  32 #include "hard-locale.h"
  33 #include "inttostr.h"
  34 #include "physmem.h"
  35 #include "posixver.h"
  36 #include "quote.h"
  37 #include "stdio-safer.h"
  38 #include "xmemcoll.h"
  39 #include "xstrtol.h"
  40
  41 #if HAVE_SYS_RESOURCE_H
  42 # include <sys/resource.h>
  43 #endif
  44 #ifndef RLIMIT_DATA
  45 struct rlimit { size_t rlim_cur; };
  46 # define getrlimit(Resource, Rlp) (-1)
  47 #endif
  48
  49 /* The official name of this program (e.g., no `g' prefix).  */
  50 #define PROGRAM_NAME "sort"
  51
  52 #define AUTHORS "Mike Haertel", "Paul Eggert"
  53
  54 #if HAVE_LANGINFO_CODESET
  55 # include <langinfo.h>
  56 #endif
  57
  58 #ifndef SA_NOCLDSTOP
  59 # define sigprocmask(How, Set, Oset) /* empty */
  60 # define sigset_t int
  61 #endif
  62
  63 #ifndef STDC_HEADERS
  64 double strtod ();
  65 #endif
  66
  67 #define UCHAR_LIM (UCHAR_MAX + 1)
  68
  69 #ifndef DEFAULT_TMPDIR
  70 # define DEFAULT_TMPDIR "/tmp"
  71 #endif
  72
  73 /* Exit statuses.  */
  74 enum
  75   {
  76     /* POSIX says to exit with status 1 if invoked with -c and the
  77        input is not properly sorted.  */
  78     SORT_OUT_OF_ORDER = 1,
  79
  80     /* POSIX says any other irregular exit must exit with a status
  81        code greater than 1.  */
  82     SORT_FAILURE = 2
  83   };
  84
  85 #define C_DECIMAL_POINT '.'
  86 #define NEGATION_SIGN   '-'
  87 #define NUMERIC_ZERO    '0'
  88
  89 #if HAVE_SETLOCALE
  90
  91 static char decimal_point;
  92 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
  93
  94 /* Nonzero if the corresponding locales are hard.  */
  95 static bool hard_LC_COLLATE;
  96 # if HAVE_NL_LANGINFO
  97 static bool hard_LC_TIME;
  98 # endif
  99
 100 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 101
 102 #else
 103
 104 # define decimal_point C_DECIMAL_POINT
 105 # define IS_THOUSANDS_SEP(x) false
 106
 107 #endif
 108
 109 #define NONZERO(x) (x != 0)
 110
 111 /* The kind of blanks for '-b' to skip in various options. */
 112 enum blanktype { bl_start, bl_end, bl_both };
 113
 114 /* The character marking end of line. Default to \n. */
 115 static char eolchar = '\n';
 116
 117 /* Lines are held in core as counted strings. */
 118 struct line
 119 {
 120   char *text;                   /* Text of the line. */
 121   size_t length;                /* Length including final newline. */
 122   char *keybeg;                 /* Start of first key. */
 123   char *keylim;                 /* Limit of first key. */
 124 };
 125
 126 /* Input buffers. */
 127 struct buffer
 128 {
 129   char *buf;                    /* Dynamically allocated buffer,
 130                                    partitioned into 3 regions:
 131                                    - input data;
 132                                    - unused area;
 133                                    - an array of lines, in reverse order.  */
 134   size_t used;                  /* Number of bytes used for input data.  */
 135   size_t nlines;                /* Number of lines in the line array.  */
 136   size_t alloc;                 /* Number of bytes allocated. */
 137   size_t left;                  /* Number of bytes left from previous reads. */
 138   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 139   bool eof;                     /* An EOF has been read.  */
 140 };
 141
 142 struct keyfield
 143 {
 144   size_t sword;                 /* Zero-origin 'word' to start at. */
 145   size_t schar;                 /* Additional characters to skip. */
 146   size_t eword;                 /* Zero-origin first word after field. */
 147   size_t echar;                 /* Additional characters in field. */
 148   bool const *ignore;           /* Boolean array of characters to ignore. */
 149   char const *translate;        /* Translation applied to characters. */
 150   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 151   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 152   bool numeric;                 /* Flag for numeric comparison.  Handle
 153                                    strings of digits with optional decimal
 154                                    point, but no exponential notation. */
 155   bool general_numeric;         /* Flag for general, numeric comparison.
 156                                    Handle numbers in exponential notation. */
 157   bool month;                   /* Flag for comparison by month name. */
 158   bool reverse;                 /* Reverse the sense of comparison. */
 159   struct keyfield *next;        /* Next keyfield to try. */
 160 };
 161
 162 struct month
 163 {
 164   char const *name;
 165   int val;
 166 };
 167
 168 /* The name this program was run with. */
 169 char *program_name;
 170
 171 /* FIXME: None of these tables work with multibyte character sets.
 172    Also, there are many other bugs when handling multibyte characters.
 173    One way to fix this is to rewrite `sort' to use wide characters
 174    internally, but doing this with good performance is a bit
 175    tricky.  */
 176
 177 /* Table of blanks.  */
 178 static bool blanks[UCHAR_LIM];
 179
 180 /* Table of non-printing characters. */
 181 static bool nonprinting[UCHAR_LIM];
 182
 183 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 184 static bool nondictionary[UCHAR_LIM];
 185
 186 /* Translation table folding lower case to upper.  */
 187 static char fold_toupper[UCHAR_LIM];
 188
 189 #define MONTHS_PER_YEAR 12
 190
 191 /* Table mapping month names to integers.
 192    Alphabetic order allows binary search. */
 193 static struct month monthtab[] =
 194 {
 195   {"APR", 4},
 196   {"AUG", 8},
 197   {"DEC", 12},
 198   {"FEB", 2},
 199   {"JAN", 1},
 200   {"JUL", 7},
 201   {"JUN", 6},
 202   {"MAR", 3},
 203   {"MAY", 5},
 204   {"NOV", 11},
 205   {"OCT", 10},
 206   {"SEP", 9}
 207 };
 208
 209 /* During the merge phase, the number of files to merge at once. */
 210 #define NMERGE 16
 211
 212 /* Minimum size for a merge or check buffer.  */
 213 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 214
 215 /* Minimum sort size; the code might not work with smaller sizes.  */
 216 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 217
 218 /* The number of bytes needed for a merge or check buffer, which can
 219    function relatively efficiently even if it holds only one line.  If
 220    a longer line is seen, this value is increased.  */
 221 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 222
 223 /* The approximate maximum number of bytes of main memory to use, as
 224    specified by the user.  Zero if the user has not specified a size.  */
 225 static size_t sort_size;
 226
 227 /* The guessed size for non-regular files.  */
 228 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 229
 230 /* Array of directory names in which any temporary files are to be created. */
 231 static char const **temp_dirs;
 232
 233 /* Number of temporary directory names used.  */
 234 static size_t temp_dir_count;
 235
 236 /* Number of allocated slots in temp_dirs.  */
 237 static size_t temp_dir_alloc;
 238
 239 /* Flag to reverse the order of all comparisons. */
 240 static bool reverse;
 241
 242 /* Flag for stable sort.  This turns off the last ditch bytewise
 243    comparison of lines, and instead leaves lines in the same order
 244    they were read if all keys compare equal.  */
 245 static bool stable;
 246
 247 /* If TAB has this value, blanks separate fields.  */
 248 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 249
 250 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 251    separated by the empty string between a non-blank character and a blank
 252    character. */
 253 static int tab = TAB_DEFAULT;
 254
 255 /* Flag to remove consecutive duplicate lines from the output.
 256    Only the last of a sequence of equal lines will be output. */
 257 static bool unique;
 258
 259 /* Nonzero if any of the input files are the standard input. */
 260 static bool have_read_stdin;
 261
 262 /* List of key field comparisons to be tried.  */
 263 static struct keyfield *keylist;
 264
 265 static void sortlines_temp (struct line *, size_t, struct line *);
 266
 267 /* Report MESSAGE for FILE, then clean up and exit.
 268    If FILE is null, it represents standard output.  */
 269
 270 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 271 static void
 272 die (char const *message, char const *file)
 273 {
 274   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 275   exit (SORT_FAILURE);
 276 }
 277
 278 void
 279 usage (int status)
 280 {
 281   if (status != EXIT_SUCCESS)
 282     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 283              program_name);
 284   else
 285     {
 286       printf (_("\
 287 Usage: %s [OPTION]... [FILE]...\n\
 288 "),
 289               program_name);
 290       fputs (_("\
 291 Write sorted concatenation of all FILE(s) to standard output.\n\
 292 \n\
 293 Ordering options:\n\
 294 \n\
 295 "), stdout);
 296       fputs (_("\
 297 Mandatory arguments to long options are mandatory for short options too.\n\
 298 "), stdout);
 299       fputs (_("\
 300   -b, --ignore-leading-blanks ignore leading blanks\n\
 301   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 302   -f, --ignore-case           fold lower case to upper case characters\n\
 303 "), stdout);
 304       fputs (_("\
 305   -g, --general-numeric-sort  compare according to general numerical value\n\
 306   -i, --ignore-nonprinting    consider only printable characters\n\
 307   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 308   -n, --numeric-sort          compare according to string numerical value\n\
 309   -r, --reverse               reverse the result of comparisons\n\
 310 \n\
 311 "), stdout);
 312       fputs (_("\
 313 Other options:\n\
 314 \n\
 315   -c, --check               check whether input is sorted; do not sort\n\
 316   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 317   -m, --merge               merge already sorted files; do not sort\n\
 318   -o, --output=FILE         write result to FILE instead of standard output\n\
 319   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 320   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 321 "), stdout);
 322       printf (_("\
 323   -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
 324   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 325                               multiple options specify multiple directories\n\
 326   -u, --unique              with -c, check for strict ordering;\n\
 327                               without -c, output only the first of an equal run\n\
 328 "), DEFAULT_TMPDIR);
 329       fputs (_("\
 330   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 331 "), stdout);
 332       fputs (HELP_OPTION_DESCRIPTION, stdout);
 333       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 334       fputs (_("\
 335 \n\
 336 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 337 in the field.  OPTS is one or more single-letter ordering options, which\n\
 338 override global ordering options for that key.  If no key is given, use the\n\
 339 entire line as the key.\n\
 340 \n\
 341 SIZE may be followed by the following multiplicative suffixes:\n\
 342 "), stdout);
 343       fputs (_("\
 344 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 345 \n\
 346 With no FILE, or when FILE is -, read standard input.\n\
 347 \n\
 348 *** WARNING ***\n\
 349 The locale specified by the environment affects sort order.\n\
 350 Set LC_ALL=C to get the traditional sort order that uses\n\
 351 native byte values.\n\
 352 "), stdout );
 353       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 354     }
 355
 356   exit (status);
 357 }
 358
 359 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
 360
 361 static struct option const long_options[] =
 362 {
 363   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 364   {"check", no_argument, NULL, 'c'},
 365   {"dictionary-order", no_argument, NULL, 'd'},
 366   {"ignore-case", no_argument, NULL, 'f'},
 367   {"general-numeric-sort", no_argument, NULL, 'g'},
 368   {"ignore-nonprinting", no_argument, NULL, 'i'},
 369   {"key", required_argument, NULL, 'k'},
 370   {"merge", no_argument, NULL, 'm'},
 371   {"month-sort", no_argument, NULL, 'M'},
 372   {"numeric-sort", no_argument, NULL, 'n'},
 373   {"output", required_argument, NULL, 'o'},
 374   {"reverse", no_argument, NULL, 'r'},
 375   {"stable", no_argument, NULL, 's'},
 376   {"buffer-size", required_argument, NULL, 'S'},
 377   {"field-separator", required_argument, NULL, 't'},
 378   {"temporary-directory", required_argument, NULL, 'T'},
 379   {"unique", no_argument, NULL, 'u'},
 380   {"zero-terminated", no_argument, NULL, 'z'},
 381   {GETOPT_HELP_OPTION_DECL},
 382   {GETOPT_VERSION_OPTION_DECL},
 383   {0, 0, 0, 0},
 384 };
 385
 386 /* The set of signals that are caught.  */
 387 static sigset_t caught_signals;
 388
 389 /* The list of temporary files. */
 390 struct tempnode
 391 {
 392   struct tempnode *volatile next;
 393   char name[1];  /* Actual size is 1 + file name length.  */
 394 };
 395 static struct tempnode *volatile temphead;
 396 static struct tempnode *volatile *temptail = &temphead;
 397
 398 /* Clean up any remaining temporary files.  */
 399
 400 static void
 401 cleanup (void)
 402 {
 403   struct tempnode const *node;
 404
 405   for (node = temphead; node; node = node->next)
 406     unlink (node->name);
 407 }
 408
 409 /* Create a new temporary file, returning its newly allocated name.
 410    Store into *PFP a stream open for writing.  */
 411
 412 static char *
 413 create_temp_file (FILE **pfp)
 414 {
 415   static char const slashbase[] = "/sortXXXXXX";
 416   static size_t temp_dir_index;
 417   sigset_t oldset;
 418   int fd;
 419   int saved_errno;
 420   char const *temp_dir = temp_dirs[temp_dir_index];
 421   size_t len = strlen (temp_dir);
 422   struct tempnode *node =
 423     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 424   char *file = node->name;
 425
 426   memcpy (file, temp_dir, len);
 427   memcpy (file + len, slashbase, sizeof slashbase);
 428   node->next = NULL;
 429   if (++temp_dir_index == temp_dir_count)
 430     temp_dir_index = 0;
 431
 432   /* Create the temporary file in a critical section, to avoid races.  */
 433   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 434   fd = mkstemp (file);
 435   if (0 <= fd)
 436     {
 437       *temptail = node;
 438       temptail = &node->next;
 439     }
 440   saved_errno = errno;
 441   sigprocmask (SIG_SETMASK, &oldset, NULL);
 442   errno = saved_errno;
 443
 444   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 445     die (_("cannot create temporary file"), file);
 446
 447   return file;
 448 }
 449
 450 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 451    standard output; HOW should be "w".  When opening for input, "-"
 452    means standard input.  To avoid confusion, do not return file
 453    descriptors 0, 1, or 2.  */
 454
 455 static FILE *
 456 xfopen (const char *file, const char *how)
 457 {
 458   FILE *fp;
 459
 460   if (!file)
 461     fp = stdout;
 462   else if (STREQ (file, "-") && *how == 'r')
 463     {
 464       have_read_stdin = true;
 465       fp = stdin;
 466     }
 467   else
 468     {
 469       if ((fp = fopen_safer (file, how)) == NULL)
 470         die (_("open failed"), file);
 471     }
 472
 473   return fp;
 474 }
 475
 476 /* Close FP, whose name is FILE, and report any errors.  */
 477
 478 static void
 479 xfclose (FILE *fp, char const *file)
 480 {
 481   if (fp == stdin)
 482     {
 483       /* Allow reading stdin from tty more than once. */
 484       if (feof (fp))
 485         clearerr (fp);
 486     }
 487   else if (fp == stdout)
 488     {
 489       /* Don't close stdout just yet.  close_stdout does that.  */
 490       if (fflush (fp) != 0)
 491         die (_("fflush failed"), file);
 492     }
 493   else
 494     {
 495       if (fclose (fp) != 0)
 496         die (_("close failed"), file);
 497     }
 498 }
 499
 500 static void
 501 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 502 {
 503   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 504     die (_("write failed"), output_file);
 505 }
 506
 507 /* Append DIR to the array of temporary directory names.  */
 508 static void
 509 add_temp_dir (char const *dir)
 510 {
 511   if (temp_dir_count == temp_dir_alloc)
 512     temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs);
 513
 514   temp_dirs[temp_dir_count++] = dir;
 515 }
 516
 517 /* Search through the list of temporary files for NAME;
 518    remove it if it is found on the list. */
 519
 520 static void
 521 zaptemp (const char *name)
 522 {
 523   struct tempnode *volatile *pnode;
 524   struct tempnode *node;
 525   sigset_t oldset;
 526
 527   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 528     if (node->name == name)
 529       {
 530         /* Unlink the temporary file in a critical section, to avoid races.  */
 531         struct tempnode *t = node->next;
 532         sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 533         unlink (name);
 534         *pnode = t;
 535         sigprocmask (SIG_SETMASK, &oldset, NULL);
 536         if (! t)
 537           temptail = pnode;
 538         free (node);
 539         break;
 540       }
 541 }
 542
 543 #if HAVE_NL_LANGINFO
 544
 545 static int
 546 struct_month_cmp (const void *m1, const void *m2)
 547 {
 548   struct month const *month1 = m1;
 549   struct month const *month2 = m2;
 550   return strcmp (month1->name, month2->name);
 551 }
 552
 553 #endif
 554
 555 /* Initialize the character class tables. */
 556
 557 static void
 558 inittables (void)
 559 {
 560   size_t i;
 561
 562   for (i = 0; i < UCHAR_LIM; ++i)
 563     {
 564       blanks[i] = !!ISBLANK (i);
 565       nonprinting[i] = !ISPRINT (i);
 566       nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
 567       fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
 568     }
 569
 570 #if HAVE_NL_LANGINFO
 571   /* If we're not in the "C" locale, read different names for months.  */
 572   if (hard_LC_TIME)
 573     {
 574       for (i = 0; i < MONTHS_PER_YEAR; i++)
 575         {
 576           char const *s;
 577           size_t s_len;
 578           size_t j;
 579           char *name;
 580
 581           s = (char *) nl_langinfo (ABMON_1 + i);
 582           s_len = strlen (s);
 583           monthtab[i].name = name = xmalloc (s_len + 1);
 584           monthtab[i].val = i + 1;
 585
 586           for (j = 0; j < s_len; j++)
 587             name[j] = fold_toupper[to_uchar (s[j])];
 588           name[j] = '\0';
 589         }
 590       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 591              sizeof *monthtab, struct_month_cmp);
 592     }
 593 #endif
 594 }
 595
 596 /* Specify the amount of main memory to use when sorting.  */
 597 static void
 598 specify_sort_size (char const *s)
 599 {
 600   uintmax_t n;
 601   char *suffix;
 602   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
 603
 604   /* The default unit is KiB.  */
 605   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 606     {
 607       if (n <= UINTMAX_MAX / 1024)
 608         n *= 1024;
 609       else
 610         e = LONGINT_OVERFLOW;
 611     }
 612
 613   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 614   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 615     switch (suffix[0])
 616       {
 617       case 'b':
 618         e = LONGINT_OK;
 619         break;
 620
 621       case '%':
 622         {
 623           double mem = physmem_total () * n / 100;
 624
 625           /* Use "<", not "<=", to avoid problems with rounding.  */
 626           if (mem < UINTMAX_MAX)
 627             {
 628               n = mem;
 629               e = LONGINT_OK;
 630             }
 631           else
 632             e = LONGINT_OVERFLOW;
 633         }
 634         break;
 635       }
 636
 637   if (e == LONGINT_OK)
 638     {
 639       /* If multiple sort sizes are specified, take the maximum, so
 640          that option order does not matter.  */
 641       if (n < sort_size)
 642         return;
 643
 644       sort_size = n;
 645       if (sort_size == n)
 646         {
 647           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 648           return;
 649         }
 650
 651       e = LONGINT_OVERFLOW;
 652     }
 653
 654   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 655 }
 656
 657 /* Return the default sort size.  */
 658 static size_t
 659 default_sort_size (void)
 660 {
 661   /* Let MEM be available memory or 1/8 of total memory, whichever
 662      is greater.  */
 663   double avail = physmem_available ();
 664   double total = physmem_total ();
 665   double mem = MAX (avail, total / 8);
 666   struct rlimit rlimit;
 667
 668   /* Let SIZE be MEM, but no more than the maximum object size or
 669      system resource limits.  Avoid the MIN macro here, as it is not
 670      quite right when only one argument is floating point.  Don't
 671      bother to check for values like RLIM_INFINITY since in practice
 672      they are not much less than SIZE_MAX.  */
 673   size_t size = SIZE_MAX;
 674   if (mem < size)
 675     size = mem;
 676   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 677     size = rlimit.rlim_cur;
 678 #ifdef RLIMIT_AS
 679   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 680     size = rlimit.rlim_cur;
 681 #endif
 682
 683   /* Leave a large safety margin for the above limits, as failure can
 684      occur when they are exceeded.  */
 685   size /= 2;
 686
 687 #ifdef RLIMIT_RSS
 688   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 689      Exceeding RSS is not fatal, but can be quite slow.  */
 690   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 691     size = rlimit.rlim_cur / 16 * 15;
 692 #endif
 693
 694   /* Use no less than the minimum.  */
 695   return MAX (size, MIN_SORT_SIZE);
 696 }
 697
 698 /* Return the sort buffer size to use with the input files identified
 699    by FPS and FILES, which are alternate paths to the same files.
 700    NFILES gives the number of input files; NFPS may be less.  Assume
 701    that each input line requires LINE_BYTES extra bytes' worth of line
 702    information.  Do not exceed a bound on the size: if the bound is
 703    not specified by the user, use a default.  */
 704
 705 static size_t
 706 sort_buffer_size (FILE *const *fps, size_t nfps,
 707                   char *const *files, size_t nfiles,
 708                   size_t line_bytes)
 709 {
 710   /* A bound on the input size.  If zero, the bound hasn't been
 711      determined yet.  */
 712   static size_t size_bound;
 713
 714   /* In the worst case, each input byte is a newline.  */
 715   size_t worst_case_per_input_byte = line_bytes + 1;
 716
 717   /* Keep enough room for one extra input line and an extra byte.
 718      This extra room might be needed when preparing to read EOF.  */
 719   size_t size = worst_case_per_input_byte + 1;
 720
 721   size_t i;
 722
 723   for (i = 0; i < nfiles; i++)
 724     {
 725       struct stat st;
 726       off_t file_size;
 727       size_t worst_case;
 728
 729       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 730            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
 731            : stat (files[i], &st))
 732           != 0)
 733         die (_("stat failed"), files[i]);
 734
 735       if (S_ISREG (st.st_mode))
 736         file_size = st.st_size;
 737       else
 738         {
 739           /* The file has unknown size.  If the user specified a sort
 740              buffer size, use that; otherwise, guess the size.  */
 741           if (sort_size)
 742             return sort_size;
 743           file_size = INPUT_FILE_SIZE_GUESS;
 744         }
 745
 746       if (! size_bound)
 747         {
 748           size_bound = sort_size;
 749           if (! size_bound)
 750             size_bound = default_sort_size ();
 751         }
 752
 753       /* Add the amount of memory needed to represent the worst case
 754          where the input consists entirely of newlines followed by a
 755          single non-newline.  Check for overflow.  */
 756       worst_case = file_size * worst_case_per_input_byte + 1;
 757       if (file_size != worst_case / worst_case_per_input_byte
 758           || size_bound - size <= worst_case)
 759         return size_bound;
 760       size += worst_case;
 761     }
 762
 763   return size;
 764 }
 765
 766 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 767    must be at least sizeof (struct line).  Allocate ALLOC bytes
 768    initially.  */
 769
 770 static void
 771 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 772 {
 773   /* Ensure that the line array is properly aligned.  If the desired
 774      size cannot be allocated, repeatedly halve it until allocation
 775      succeeds.  The smaller allocation may hurt overall performance,
 776      but that's better than failing.  */
 777   for (;;)
 778     {
 779       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 780       buf->buf = malloc (alloc);
 781       if (buf->buf)
 782         break;
 783       alloc /= 2;
 784       if (alloc <= line_bytes + 1)
 785         xalloc_die ();
 786     }
 787
 788   buf->line_bytes = line_bytes;
 789   buf->alloc = alloc;
 790   buf->used = buf->left = buf->nlines = 0;
 791   buf->eof = false;
 792 }
 793
 794 /* Return one past the limit of the line array.  */
 795
 796 static inline struct line *
 797 buffer_linelim (struct buffer const *buf)
 798 {
 799   return (struct line *) (buf->buf + buf->alloc);
 800 }
 801
 802 /* Return a pointer to the first character of the field specified
 803    by KEY in LINE. */
 804
 805 static char *
 806 begfield (const struct line *line, const struct keyfield *key)
 807 {
 808   register char *ptr = line->text, *lim = ptr + line->length - 1;
 809   register size_t sword = key->sword;
 810   register size_t schar = key->schar;
 811   register size_t remaining_bytes;
 812
 813   /* The leading field separator itself is included in a field when -t
 814      is absent.  */
 815
 816   if (tab != TAB_DEFAULT)
 817     while (ptr < lim && sword--)
 818       {
 819         while (ptr < lim && *ptr != tab)
 820           ++ptr;
 821         if (ptr < lim)
 822           ++ptr;
 823       }
 824   else
 825     while (ptr < lim && sword--)
 826       {
 827         while (ptr < lim && blanks[to_uchar (*ptr)])
 828           ++ptr;
 829         while (ptr < lim && !blanks[to_uchar (*ptr)])
 830           ++ptr;
 831       }
 832
 833   if (key->skipsblanks)
 834     while (ptr < lim && blanks[to_uchar (*ptr)])
 835       ++ptr;
 836
 837   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
 838   remaining_bytes = lim - ptr;
 839   if (schar < remaining_bytes)
 840     ptr += schar;
 841   else
 842     ptr = lim;
 843
 844   return ptr;
 845 }
 846
 847 /* Return the limit of (a pointer to the first character after) the field
 848    in LINE specified by KEY. */
 849
 850 static char *
 851 limfield (const struct line *line, const struct keyfield *key)
 852 {
 853   register char *ptr = line->text, *lim = ptr + line->length - 1;
 854   register size_t eword = key->eword, echar = key->echar;
 855   register size_t remaining_bytes;
 856
 857   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 858      whichever comes first.  If there are more than EWORD fields, leave
 859      PTR pointing at the beginning of the field having zero-based index,
 860      EWORD.  If a delimiter character was specified (via -t), then that
 861      `beginning' is the first character following the delimiting TAB.
 862      Otherwise, leave PTR pointing at the first `blank' character after
 863      the preceding field.  */
 864   if (tab != TAB_DEFAULT)
 865     while (ptr < lim && eword--)
 866       {
 867         while (ptr < lim && *ptr != tab)
 868           ++ptr;
 869         if (ptr < lim && (eword | echar))
 870           ++ptr;
 871       }
 872   else
 873     while (ptr < lim && eword--)
 874       {
 875         while (ptr < lim && blanks[to_uchar (*ptr)])
 876           ++ptr;
 877         while (ptr < lim && !blanks[to_uchar (*ptr)])
 878           ++ptr;
 879       }
 880
 881 #ifdef POSIX_UNSPECIFIED
 882   /* The following block of code makes GNU sort incompatible with
 883      standard Unix sort, so it's ifdef'd out for now.
 884      The POSIX spec isn't clear on how to interpret this.
 885      FIXME: request clarification.
 886
 887      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 888      Date: Thu, 30 May 96 12:20:41 -0400
 889      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
 890
 891      [...]I believe I've found another bug in `sort'.
 892
 893      $ cat /tmp/sort.in
 894      a b c 2 d
 895      pq rs 1 t
 896      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
 897      a b c 2 d
 898      pq rs 1 t
 899      $ /bin/sort -k1.7,1.7 </tmp/sort.in
 900      pq rs 1 t
 901      a b c 2 d
 902
 903      Unix sort produced the answer I expected: sort on the single character
 904      in column 7.  GNU sort produced different results, because it disagrees
 905      on the interpretation of the key-end spec "M.N".  Unix sort reads this
 906      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
 907      "skip M-1 fields, then either N-1 characters or the rest of the current
 908      field, whichever comes first".  This extra clause applies only to
 909      key-ends, not key-starts.
 910      */
 911
 912   /* Make LIM point to the end of (one byte past) the current field.  */
 913   if (tab != TAB_DEFAULT)
 914     {
 915       char *newlim;
 916       newlim = memchr (ptr, tab, lim - ptr);
 917       if (newlim)
 918         lim = newlim;
 919     }
 920   else
 921     {
 922       char *newlim;
 923       newlim = ptr;
 924       while (newlim < lim && blanks[to_uchar (*newlim)])
 925         ++newlim;
 926       while (newlim < lim && !blanks[to_uchar (*newlim)])
 927         ++newlim;
 928       lim = newlim;
 929     }
 930 #endif
 931
 932   /* If we're ignoring leading blanks when computing the End
 933      of the field, don't start counting bytes until after skipping
 934      past any leading blanks. */
 935   if (key->skipeblanks)
 936     while (ptr < lim && blanks[to_uchar (*ptr)])
 937       ++ptr;
 938
 939   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 940   remaining_bytes = lim - ptr;
 941   if (echar < remaining_bytes)
 942     ptr += echar;
 943   else
 944     ptr = lim;
 945
 946   return ptr;
 947 }
 948
 949 /* Fill BUF reading from FP, moving buf->left bytes from the end
 950    of buf->buf to the beginning first.  If EOF is reached and the
 951    file wasn't terminated by a newline, supply one.  Set up BUF's line
 952    table too.  FILE is the name of the file corresponding to FP.
 953    Return true if some input was read.  */
 954
 955 static bool
 956 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
 957 {
 958   struct keyfield const *key = keylist;
 959   char eol = eolchar;
 960   size_t line_bytes = buf->line_bytes;
 961   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
 962
 963   if (buf->eof)
 964     return false;
 965
 966   if (buf->used != buf->left)
 967     {
 968       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 969       buf->used = buf->left;
 970       buf->nlines = 0;
 971     }
 972
 973   for (;;)
 974     {
 975       char *ptr = buf->buf + buf->used;
 976       struct line *linelim = buffer_linelim (buf);
 977       struct line *line = linelim - buf->nlines;
 978       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
 979       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
 980
 981       while (line_bytes + 1 < avail)
 982         {
 983           /* Read as many bytes as possible, but do not read so many
 984              bytes that there might not be enough room for the
 985              corresponding line array.  The worst case is when the
 986              rest of the input file consists entirely of newlines,
 987              except that the last byte is not a newline.  */
 988           size_t readsize = (avail - 1) / (line_bytes + 1);
 989           size_t bytes_read = fread (ptr, 1, readsize, fp);
 990           char *ptrlim = ptr + bytes_read;
 991           char *p;
 992           avail -= bytes_read;
 993
 994           if (bytes_read != readsize)
 995             {
 996               if (ferror (fp))
 997                 die (_("read failed"), file);
 998               if (feof (fp))
 999                 {
1000                   buf->eof = true;
1001                   if (buf->buf == ptrlim)
1002                     return false;
1003                   if (ptrlim[-1] != eol)
1004                     *ptrlim++ = eol;
1005                 }
1006             }
1007
1008           /* Find and record each line in the just-read input.  */
1009           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1010             {
1011               ptr = p + 1;
1012               line--;
1013               line->text = line_start;
1014               line->length = ptr - line_start;
1015               mergesize = MAX (mergesize, line->length);
1016               avail -= line_bytes;
1017
1018               if (key)
1019                 {
1020                   /* Precompute the position of the first key for
1021                      efficiency.  */
1022                   line->keylim = (key->eword == SIZE_MAX
1023                                   ? p
1024                                   : limfield (line, key));
1025
1026                   if (key->sword != SIZE_MAX)
1027                     line->keybeg = begfield (line, key);
1028                   else
1029                     {
1030                       if (key->skipsblanks)
1031                         while (blanks[to_uchar (*line_start)])
1032                           line_start++;
1033                       line->keybeg = line_start;
1034                     }
1035                 }
1036
1037               line_start = ptr;
1038             }
1039
1040           ptr = ptrlim;
1041           if (buf->eof)
1042             break;
1043         }
1044
1045       buf->used = ptr - buf->buf;
1046       buf->nlines = buffer_linelim (buf) - line;
1047       if (buf->nlines != 0)
1048         {
1049           buf->left = ptr - line_start;
1050           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1051           return true;
1052         }
1053
1054       /* The current input line is too long to fit in the buffer.
1055          Double the buffer size and try again.  */
1056       buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf));
1057     }
1058 }
1059
1060 /* Compare strings A and B containing decimal fractions < 1.  Each string
1061    should begin with a decimal point followed immediately by the digits
1062    of the fraction.  Strings not of this form are considered to be zero. */
1063
1064 /* The goal here, is to take two numbers a and b... compare these
1065    in parallel.  Instead of converting each, and then comparing the
1066    outcome.  Most likely stopping the comparison before the conversion
1067    is complete.  The algorithm used, in the old sort:
1068
1069    Algorithm: fraccompare
1070    Action   : compare two decimal fractions
1071    accepts  : char *a, char *b
1072    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1073    implement:
1074
1075    if *a == decimal_point AND *b == decimal_point
1076      find first character different in a and b.
1077      if both are digits, return the difference *a - *b.
1078      if *a is a digit
1079        skip past zeros
1080        if digit return 1, else 0
1081      if *b is a digit
1082        skip past zeros
1083        if digit return -1, else 0
1084    if *a is a decimal_point
1085      skip past decimal_point and zeros
1086      if digit return 1, else 0
1087    if *b is a decimal_point
1088      skip past decimal_point and zeros
1089      if digit return -1, else 0
1090    return 0 */
1091
1092 static int
1093 fraccompare (register const char *a, register const char *b)
1094 {
1095   if (*a == decimal_point && *b == decimal_point)
1096     {
1097       while (*++a == *++b)
1098         if (! ISDIGIT (*a))
1099           return 0;
1100       if (ISDIGIT (*a) && ISDIGIT (*b))
1101         return *a - *b;
1102       if (ISDIGIT (*a))
1103         goto a_trailing_nonzero;
1104       if (ISDIGIT (*b))
1105         goto b_trailing_nonzero;
1106       return 0;
1107     }
1108   else if (*a++ == decimal_point)
1109     {
1110     a_trailing_nonzero:
1111       while (*a == NUMERIC_ZERO)
1112         a++;
1113       return ISDIGIT (*a);
1114     }
1115   else if (*b++ == decimal_point)
1116     {
1117     b_trailing_nonzero:
1118       while (*b == NUMERIC_ZERO)
1119         b++;
1120       return - ISDIGIT (*b);
1121     }
1122   return 0;
1123 }
1124
1125 /* Compare strings A and B as numbers without explicitly converting them to
1126    machine numbers.  Comparatively slow for short strings, but asymptotically
1127    hideously fast. */
1128
1129 static int
1130 numcompare (register const char *a, register const char *b)
1131 {
1132   char tmpa;
1133   char tmpb;
1134   int tmp;
1135   size_t log_a;
1136   size_t log_b;
1137
1138   tmpa = *a;
1139   tmpb = *b;
1140
1141   while (blanks[to_uchar (tmpa)])
1142     tmpa = *++a;
1143   while (blanks[to_uchar (tmpb)])
1144     tmpb = *++b;
1145
1146   if (tmpa == NEGATION_SIGN)
1147     {
1148       do
1149         tmpa = *++a;
1150       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1151       if (tmpb != NEGATION_SIGN)
1152         {
1153           if (tmpa == decimal_point)
1154             do
1155               tmpa = *++a;
1156             while (tmpa == NUMERIC_ZERO);
1157           if (ISDIGIT (tmpa))
1158             return -1;
1159           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1160             tmpb = *++b;
1161           if (tmpb == decimal_point)
1162             do
1163               tmpb = *++b;
1164             while (tmpb == NUMERIC_ZERO);
1165           if (ISDIGIT (tmpb))
1166             return -1;
1167           return 0;
1168         }
1169       do
1170         tmpb = *++b;
1171       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1172
1173       while (tmpa == tmpb && ISDIGIT (tmpa))
1174         {
1175           do
1176             tmpa = *++a;
1177           while (IS_THOUSANDS_SEP (tmpa));
1178           do
1179             tmpb = *++b;
1180           while (IS_THOUSANDS_SEP (tmpb));
1181         }
1182
1183       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1184           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1185         return -fraccompare (a, b);
1186
1187       tmp = tmpb - tmpa;
1188
1189       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1190         do
1191           tmpa = *++a;
1192         while (IS_THOUSANDS_SEP (tmpa));
1193
1194       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1195         do
1196           tmpb = *++b;
1197         while (IS_THOUSANDS_SEP (tmpb));
1198
1199       if (log_a != log_b)
1200         return log_a < log_b ? 1 : -1;
1201
1202       if (!log_a)
1203         return 0;
1204
1205       return tmp;
1206     }
1207   else if (tmpb == NEGATION_SIGN)
1208     {
1209       do
1210         tmpb = *++b;
1211       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1212       if (tmpb == decimal_point)
1213         do
1214           tmpb = *++b;
1215         while (tmpb == NUMERIC_ZERO);
1216       if (ISDIGIT (tmpb))
1217         return 1;
1218       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1219         tmpa = *++a;
1220       if (tmpa == decimal_point)
1221         do
1222           tmpa = *++a;
1223         while (tmpa == NUMERIC_ZERO);
1224       if (ISDIGIT (tmpa))
1225         return 1;
1226       return 0;
1227     }
1228   else
1229     {
1230       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1231         tmpa = *++a;
1232       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1233         tmpb = *++b;
1234
1235       while (tmpa == tmpb && ISDIGIT (tmpa))
1236         {
1237           do
1238             tmpa = *++a;
1239           while (IS_THOUSANDS_SEP (tmpa));
1240           do
1241             tmpb = *++b;
1242           while (IS_THOUSANDS_SEP (tmpb));
1243         }
1244
1245       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1246           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1247         return fraccompare (a, b);
1248
1249       tmp = tmpa - tmpb;
1250
1251       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1252         do
1253           tmpa = *++a;
1254         while (IS_THOUSANDS_SEP (tmpa));
1255
1256       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1257         do
1258           tmpb = *++b;
1259         while (IS_THOUSANDS_SEP (tmpb));
1260
1261       if (log_a != log_b)
1262         return log_a < log_b ? -1 : 1;
1263
1264       if (!log_a)
1265         return 0;
1266
1267       return tmp;
1268     }
1269 }
1270
1271 static int
1272 general_numcompare (const char *sa, const char *sb)
1273 {
1274   /* FIXME: add option to warn about failed conversions.  */
1275   /* FIXME: maybe add option to try expensive FP conversion
1276      only if A and B can't be compared more cheaply/accurately.  */
1277
1278   char *ea;
1279   char *eb;
1280   double a = strtod (sa, &ea);
1281   double b = strtod (sb, &eb);
1282
1283   /* Put conversion errors at the start of the collating sequence.  */
1284   if (sa == ea)
1285     return sb == eb ? 0 : -1;
1286   if (sb == eb)
1287     return 1;
1288
1289   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1290      conversion errors but before numbers; sort them by internal
1291      bit-pattern, for lack of a more portable alternative.  */
1292   return (a < b ? -1
1293           : a > b ? 1
1294           : a == b ? 0
1295           : b == b ? -1
1296           : a == a ? 1
1297           : memcmp ((char *) &a, (char *) &b, sizeof a));
1298 }
1299
1300 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1301    Return 0 if the name in S is not recognized.  */
1302
1303 static int
1304 getmonth (char const *month, size_t len)
1305 {
1306   size_t lo = 0;
1307   size_t hi = MONTHS_PER_YEAR;
1308   char const *monthlim = month + len;
1309
1310   for (;;)
1311     {
1312       if (month == monthlim)
1313         return 0;
1314       if (!blanks[to_uchar (*month)])
1315         break;
1316       ++month;
1317     }
1318
1319   do
1320     {
1321       size_t ix = (lo + hi) / 2;
1322       char const *m = month;
1323       char const *n = monthtab[ix].name;
1324
1325       for (;; m++, n++)
1326         {
1327           if (!*n)
1328             return monthtab[ix].val;
1329           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1330             {
1331               hi = ix;
1332               break;
1333             }
1334           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1335             {
1336               lo = ix + 1;
1337               break;
1338             }
1339         }
1340     }
1341   while (lo < hi);
1342
1343   return 0;
1344 }
1345
1346 /* Compare two lines A and B trying every key in sequence until there
1347    are no more keys or a difference is found. */
1348
1349 static int
1350 keycompare (const struct line *a, const struct line *b)
1351 {
1352   struct keyfield const *key = keylist;
1353
1354   /* For the first iteration only, the key positions have been
1355      precomputed for us. */
1356   register char *texta = a->keybeg;
1357   register char *textb = b->keybeg;
1358   register char *lima = a->keylim;
1359   register char *limb = b->keylim;
1360
1361   int diff;
1362
1363   for (;;)
1364     {
1365       register char const *translate = key->translate;
1366       register bool const *ignore = key->ignore;
1367
1368       /* Find the lengths. */
1369       size_t lena = lima <= texta ? 0 : lima - texta;
1370       size_t lenb = limb <= textb ? 0 : limb - textb;
1371
1372       /* Actually compare the fields. */
1373       if (key->numeric | key->general_numeric)
1374         {
1375           char savea = *lima, saveb = *limb;
1376
1377           *lima = *limb = '\0';
1378           diff = ((key->numeric ? numcompare : general_numcompare)
1379                   (texta, textb));
1380           *lima = savea, *limb = saveb;
1381         }
1382       else if (key->month)
1383         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1384       /* Sorting like this may become slow, so in a simple locale the user
1385          can select a faster sort that is similar to ascii sort.  */
1386       else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1387         {
1388           if (ignore || translate)
1389             {
1390               char buf[4000];
1391               size_t size = lena + 1 + lenb + 1;
1392               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1393               char *copy_b = copy_a + lena + 1;
1394               size_t new_len_a, new_len_b, i;
1395
1396               /* Ignore and/or translate chars before comparing.  */
1397               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1398                 {
1399                   if (i < lena)
1400                     {
1401                       copy_a[new_len_a] = (translate
1402                                            ? translate[to_uchar (texta[i])]
1403                                            : texta[i]);
1404                       if (!ignore || !ignore[to_uchar (texta[i])])
1405                         ++new_len_a;
1406                     }
1407                   if (i < lenb)
1408                     {
1409                       copy_b[new_len_b] = (translate
1410                                            ? translate[to_uchar (textb[i])]
1411                                            : textb [i]);
1412                       if (!ignore || !ignore[to_uchar (textb[i])])
1413                         ++new_len_b;
1414                     }
1415                 }
1416
1417               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1418
1419               if (sizeof buf < size)
1420                 free (copy_a);
1421             }
1422           else if (lena == 0)
1423             diff = - NONZERO (lenb);
1424           else if (lenb == 0)
1425             goto greater;
1426           else
1427             diff = xmemcoll (texta, lena, textb, lenb);
1428         }
1429       else if (ignore)
1430         {
1431 #define CMP_WITH_IGNORE(A, B)                                           \
1432   do                                                                    \
1433     {                                                                   \
1434           for (;;)                                                      \
1435             {                                                           \
1436               while (texta < lima && ignore[to_uchar (*texta)])         \
1437                 ++texta;                                                \
1438               while (textb < limb && ignore[to_uchar (*textb)])         \
1439                 ++textb;                                                \
1440               if (! (texta < lima && textb < limb))                     \
1441                 break;                                                  \
1442               diff = to_uchar (A) - to_uchar (B);                       \
1443               if (diff)                                                 \
1444                 goto not_equal;                                         \
1445               ++texta;                                                  \
1446               ++textb;                                                  \
1447             }                                                           \
1448                                                                         \
1449           diff = (texta < lima) - (textb < limb);                       \
1450     }                                                                   \
1451   while (0)
1452
1453           if (translate)
1454             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1455                              translate[to_uchar (*textb)]);
1456           else
1457             CMP_WITH_IGNORE (*texta, *textb);
1458         }
1459       else if (lena == 0)
1460         diff = - NONZERO (lenb);
1461       else if (lenb == 0)
1462         goto greater;
1463       else
1464         {
1465           if (translate)
1466             {
1467               while (texta < lima && textb < limb)
1468                 {
1469                   diff = (to_uchar (translate[to_uchar (*texta++)])
1470                           - to_uchar (translate[to_uchar (*textb++)]));
1471                   if (diff)
1472                     goto not_equal;
1473                 }
1474             }
1475           else
1476             {
1477               diff = memcmp (texta, textb, MIN (lena, lenb));
1478               if (diff)
1479                 goto not_equal;
1480             }
1481           diff = lena < lenb ? -1 : lena != lenb;
1482         }
1483
1484       if (diff)
1485         goto not_equal;
1486
1487       key = key->next;
1488       if (! key)
1489         break;
1490
1491       /* Find the beginning and limit of the next field.  */
1492       if (key->eword != SIZE_MAX)
1493         lima = limfield (a, key), limb = limfield (b, key);
1494       else
1495         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1496
1497       if (key->sword != SIZE_MAX)
1498         texta = begfield (a, key), textb = begfield (b, key);
1499       else
1500         {
1501           texta = a->text, textb = b->text;
1502           if (key->skipsblanks)
1503             {
1504               while (texta < lima && blanks[to_uchar (*texta)])
1505                 ++texta;
1506               while (textb < limb && blanks[to_uchar (*textb)])
1507                 ++textb;
1508             }
1509         }
1510     }
1511
1512   return 0;
1513
1514  greater:
1515   diff = 1;
1516  not_equal:
1517   return key->reverse ? -diff : diff;
1518 }
1519
1520 /* Compare two lines A and B, returning negative, zero, or positive
1521    depending on whether A compares less than, equal to, or greater than B. */
1522
1523 static int
1524 compare (register const struct line *a, register const struct line *b)
1525 {
1526   int diff;
1527   size_t alen, blen;
1528
1529   /* First try to compare on the specified keys (if any).
1530      The only two cases with no key at all are unadorned sort,
1531      and unadorned sort -r. */
1532   if (keylist)
1533     {
1534       diff = keycompare (a, b);
1535       if (diff | unique | stable)
1536         return diff;
1537     }
1538
1539   /* If the keys all compare equal (or no keys were specified)
1540      fall through to the default comparison.  */
1541   alen = a->length - 1, blen = b->length - 1;
1542
1543   if (alen == 0)
1544     diff = - NONZERO (blen);
1545   else if (blen == 0)
1546     diff = 1;
1547   else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1548     diff = xmemcoll (a->text, alen, b->text, blen);
1549   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1550     diff = alen < blen ? -1 : alen != blen;
1551
1552   return reverse ? -diff : diff;
1553 }
1554
1555 /* Check that the lines read from FILE_NAME come in order.  Print a
1556    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1557    false if they are not in order.  Otherwise, print no diagnostic
1558    and return true.  */
1559
1560 static bool
1561 check (char const *file_name)
1562 {
1563   FILE *fp = xfopen (file_name, "r");
1564   struct buffer buf;            /* Input buffer. */
1565   struct line temp;             /* Copy of previous line. */
1566   size_t alloc = 0;
1567   uintmax_t line_number = 0;
1568   struct keyfield const *key = keylist;
1569   bool nonunique = ! unique;
1570   bool ordered = true;
1571
1572   initbuf (&buf, sizeof (struct line),
1573            MAX (merge_buffer_size, sort_size));
1574   temp.text = NULL;
1575
1576   while (fillbuf (&buf, fp, file_name))
1577     {
1578       struct line const *line = buffer_linelim (&buf);
1579       struct line const *linebase = line - buf.nlines;
1580
1581       /* Make sure the line saved from the old buffer contents is
1582          less than or equal to the first line of the new buffer. */
1583       if (alloc && nonunique <= compare (&temp, line - 1))
1584         {
1585         found_disorder:
1586           {
1587             struct line const *disorder_line = line - 1;
1588             uintmax_t disorder_line_number =
1589               buffer_linelim (&buf) - disorder_line + line_number;
1590             char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1591             fprintf (stderr, _("%s: %s:%s: disorder: "),
1592                      program_name, file_name,
1593                      umaxtostr (disorder_line_number, hr_buf));
1594             write_bytes (disorder_line->text, disorder_line->length, stderr,
1595                          _("standard error"));
1596             ordered = false;
1597             break;
1598           }
1599         }
1600
1601       /* Compare each line in the buffer with its successor.  */
1602       while (linebase < --line)
1603         if (nonunique <= compare (line, line - 1))
1604           goto found_disorder;
1605
1606       line_number += buf.nlines;
1607
1608       /* Save the last line of the buffer.  */
1609       if (alloc < line->length)
1610         {
1611           do
1612             {
1613               alloc *= 2;
1614               if (! alloc)
1615                 {
1616                   alloc = line->length;
1617                   break;
1618                 }
1619             }
1620           while (alloc < line->length);
1621
1622           temp.text = xrealloc (temp.text, alloc);
1623         }
1624       memcpy (temp.text, line->text, line->length);
1625       temp.length = line->length;
1626       if (key)
1627         {
1628           temp.keybeg = temp.text + (line->keybeg - line->text);
1629           temp.keylim = temp.text + (line->keylim - line->text);
1630         }
1631     }
1632
1633   xfclose (fp, file_name);
1634   free (buf.buf);
1635   if (temp.text)
1636     free (temp.text);
1637   return ordered;
1638 }
1639
1640 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
1641    files (all of which are at the start of the FILES array), and
1642    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
1643    Close input and output files before returning.
1644    OUTPUT_FILE gives the name of the output file.  If it is NULL,
1645    the output file is standard output.  If OFP is NULL, the output
1646    file has not been opened yet (or written to, if standard output).  */
1647
1648 static void
1649 mergefps (char **files, size_t ntemps, size_t nfiles,
1650           FILE *ofp, char const *output_file)
1651 {
1652   FILE *fps[NMERGE];            /* Input streams for each file.  */
1653   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1654   struct line saved;            /* Saved line storage for unique check. */
1655   struct line const *savedline = NULL;
1656                                 /* &saved if there is a saved line. */
1657   size_t savealloc = 0;         /* Size allocated for the saved line. */
1658   struct line const *cur[NMERGE]; /* Current line in each line table. */
1659   struct line const *base[NMERGE]; /* Base of each line table.  */
1660   size_t ord[NMERGE];           /* Table representing a permutation of fps,
1661                                    such that cur[ord[0]] is the smallest line
1662                                    and will be next output. */
1663   size_t i;
1664   size_t j;
1665   size_t t;
1666   struct keyfield const *key = keylist;
1667   saved.text = NULL;
1668
1669   /* Read initial lines from each input file. */
1670   for (i = 0; i < nfiles; )
1671     {
1672       fps[i] = xfopen (files[i], "r");
1673       initbuf (&buffer[i], sizeof (struct line),
1674                MAX (merge_buffer_size, sort_size / nfiles));
1675       if (fillbuf (&buffer[i], fps[i], files[i]))
1676         {
1677           struct line const *linelim = buffer_linelim (&buffer[i]);
1678           cur[i] = linelim - 1;
1679           base[i] = linelim - buffer[i].nlines;
1680           i++;
1681         }
1682       else
1683         {
1684           /* fps[i] is empty; eliminate it from future consideration.  */
1685           xfclose (fps[i], files[i]);
1686           if (i < ntemps)
1687             {
1688               ntemps--;
1689               zaptemp (files[i]);
1690             }
1691           free (buffer[i].buf);
1692           --nfiles;
1693           for (j = i; j < nfiles; ++j)
1694             files[j] = files[j + 1];
1695         }
1696     }
1697
1698   if (! ofp)
1699     ofp = xfopen (output_file, "w");
1700
1701   /* Set up the ord table according to comparisons among input lines.
1702      Since this only reorders two items if one is strictly greater than
1703      the other, it is stable. */
1704   for (i = 0; i < nfiles; ++i)
1705     ord[i] = i;
1706   for (i = 1; i < nfiles; ++i)
1707     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1708       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1709
1710   /* Repeatedly output the smallest line until no input remains. */
1711   while (nfiles)
1712     {
1713       struct line const *smallest = cur[ord[0]];
1714
1715       /* If uniquified output is turned on, output only the first of
1716          an identical series of lines. */
1717       if (unique)
1718         {
1719           if (savedline && compare (savedline, smallest))
1720             {
1721               savedline = 0;
1722               write_bytes (saved.text, saved.length, ofp, output_file);
1723             }
1724           if (!savedline)
1725             {
1726               savedline = &saved;
1727               if (savealloc < smallest->length)
1728                 {
1729                   do
1730                     if (! savealloc)
1731                       {
1732                         savealloc = smallest->length;
1733                         break;
1734                       }
1735                   while ((savealloc *= 2) < smallest->length);
1736
1737                   saved.text = xrealloc (saved.text, savealloc);
1738                 }
1739               saved.length = smallest->length;
1740               memcpy (saved.text, smallest->text, saved.length);
1741               if (key)
1742                 {
1743                   saved.keybeg =
1744                     saved.text + (smallest->keybeg - smallest->text);
1745                   saved.keylim =
1746                     saved.text + (smallest->keylim - smallest->text);
1747                 }
1748             }
1749         }
1750       else
1751         write_bytes (smallest->text, smallest->length, ofp, output_file);
1752
1753       /* Check if we need to read more lines into core. */
1754       if (base[ord[0]] < smallest)
1755         cur[ord[0]] = smallest - 1;
1756       else
1757         {
1758           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1759             {
1760               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1761               cur[ord[0]] = linelim - 1;
1762               base[ord[0]] = linelim - buffer[ord[0]].nlines;
1763             }
1764           else
1765             {
1766               /* We reached EOF on fps[ord[0]].  */
1767               for (i = 1; i < nfiles; ++i)
1768                 if (ord[i] > ord[0])
1769                   --ord[i];
1770               --nfiles;
1771               xfclose (fps[ord[0]], files[ord[0]]);
1772               if (ord[0] < ntemps)
1773                 {
1774                   ntemps--;
1775                   zaptemp (files[ord[0]]);
1776                 }
1777               free (buffer[ord[0]].buf);
1778               for (i = ord[0]; i < nfiles; ++i)
1779                 {
1780                   fps[i] = fps[i + 1];
1781                   files[i] = files[i + 1];
1782                   buffer[i] = buffer[i + 1];
1783                   cur[i] = cur[i + 1];
1784                   base[i] = base[i + 1];
1785                 }
1786               for (i = 0; i < nfiles; ++i)
1787                 ord[i] = ord[i + 1];
1788               continue;
1789             }
1790         }
1791
1792       /* The new line just read in may be larger than other lines
1793          already in core; push it back in the queue until we encounter
1794          a line larger than it. */
1795       for (i = 1; i < nfiles; ++i)
1796         {
1797           int cmp = compare (cur[ord[0]], cur[ord[i]]);
1798           if (cmp < 0 || (cmp == 0 && ord[0] < ord[i]))
1799             break;
1800         }
1801       t = ord[0];
1802       for (j = 1; j < i; ++j)
1803         ord[j - 1] = ord[j];
1804       ord[i - 1] = t;
1805     }
1806
1807   if (unique && savedline)
1808     {
1809       write_bytes (saved.text, saved.length, ofp, output_file);
1810       free (saved.text);
1811     }
1812
1813   xfclose (ofp, output_file);
1814 }
1815
1816 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1817    and HI (with NHI members).  T, LO, and HI point just past their
1818    respective arrays, and the arrays are in reverse order.  NLO and
1819    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
1820
1821 static inline void
1822 mergelines (struct line *t,
1823             struct line const *lo, size_t nlo,
1824             struct line const *hi, size_t nhi)
1825 {
1826   for (;;)
1827     if (compare (lo - 1, hi - 1) <= 0)
1828       {
1829         *--t = *--lo;
1830         if (! --nlo)
1831           {
1832             /* HI - NHI equalled T - (NLO + NHI) when this function
1833                began.  Therefore HI must equal T now, and there is no
1834                need to copy from HI to T.  */
1835             return;
1836           }
1837       }
1838     else
1839       {
1840         *--t = *--hi;
1841         if (! --nhi)
1842           {
1843             do
1844               *--t = *--lo;
1845             while (--nlo);
1846
1847             return;
1848           }
1849       }
1850 }
1851
1852 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1853    NLINES must be at least 2.
1854    The input and output arrays are in reverse order, and LINES and
1855    TEMP point just past the end of their respective arrays.
1856
1857    Use a recursive divide-and-conquer algorithm, in the style
1858    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
1859    the optimization suggested by exercise 5.2.4-10; this requires room
1860    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
1861    writes that this memory optimization was originally published by
1862    D. A. Bell, Comp J. 1 (1958), 75.  */
1863
1864 static void
1865 sortlines (struct line *lines, size_t nlines, struct line *temp)
1866 {
1867   if (nlines == 2)
1868     {
1869       if (0 < compare (&lines[-1], &lines[-2]))
1870         {
1871           struct line tmp = lines[-1];
1872           lines[-1] = lines[-2];
1873           lines[-2] = tmp;
1874         }
1875     }
1876   else
1877     {
1878       size_t nlo = nlines / 2;
1879       size_t nhi = nlines - nlo;
1880       struct line *lo = lines;
1881       struct line *hi = lines - nlo;
1882       struct line *sorted_lo = temp;
1883
1884       sortlines (hi, nhi, temp);
1885       if (1 < nlo)
1886         sortlines_temp (lo, nlo, sorted_lo);
1887       else
1888         sorted_lo[-1] = lo[-1];
1889
1890       mergelines (lines, sorted_lo, nlo, hi, nhi);
1891     }
1892 }
1893
1894 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1895    rather than sorting in place.  */
1896
1897 static void
1898 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1899 {
1900   if (nlines == 2)
1901     {
1902       bool swap = (0 < compare (&lines[-1], &lines[-2]));
1903       temp[-1] = lines[-1 - swap];
1904       temp[-2] = lines[-2 + swap];
1905     }
1906   else
1907     {
1908       size_t nlo = nlines / 2;
1909       size_t nhi = nlines - nlo;
1910       struct line *lo = lines;
1911       struct line *hi = lines - nlo;
1912       struct line *sorted_hi = temp - nlo;
1913
1914       sortlines_temp (hi, nhi, sorted_hi);
1915       if (1 < nlo)
1916         sortlines (lo, nlo, temp);
1917
1918       mergelines (temp, lo, nlo, sorted_hi, nhi);
1919     }
1920 }
1921
1922 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
1923    the same as OUTFILE.  If found, merge the found instances (and perhaps
1924    some other files) into a temporary file so that it can in turn be
1925    merged into OUTFILE without destroying OUTFILE before it is completely
1926    read.  Return the new value of NFILES, which differs from the old if
1927    some merging occurred.
1928
1929    This test ensures that an otherwise-erroneous use like
1930    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
1931    It's not clear that POSIX requires this nicety.
1932    Detect common error cases, but don't try to catch obscure cases like
1933    "cat ... FILE ... | sort -m -o FILE"
1934    where traditional "sort" doesn't copy the input and where
1935    people should know that they're getting into trouble anyway.
1936    Catching these obscure cases would slow down performance in
1937    common cases.  */
1938
1939 static size_t
1940 avoid_trashing_input (char **files, size_t ntemps, size_t nfiles,
1941                       char const *outfile)
1942 {
1943   size_t i;
1944   bool got_outstat = false;
1945   struct stat outstat;
1946
1947   for (i = ntemps; i < nfiles; i++)
1948     {
1949       bool standard_input = STREQ (files[i], "-");
1950       bool same;
1951       struct stat instat;
1952
1953       if (outfile && STREQ (outfile, files[i]) && ! standard_input)
1954         same = true;
1955       else
1956         {
1957           if (! got_outstat)
1958             {
1959               if ((outfile
1960                    ? stat (outfile, &outstat)
1961                    : fstat (STDOUT_FILENO, &outstat))
1962                   != 0)
1963                 break;
1964               got_outstat = true;
1965             }
1966
1967           same = (((standard_input
1968                     ? fstat (STDIN_FILENO, &instat)
1969                     : stat (files[i], &instat))
1970                    == 0)
1971                   && SAME_INODE (instat, outstat));
1972         }
1973
1974       if (same)
1975         {
1976           FILE *tftp;
1977           char *temp = create_temp_file (&tftp);
1978           mergefps (&files[i], 0, nfiles - i, tftp, temp);
1979           files[i] = temp;
1980           return i + 1;
1981         }
1982     }
1983
1984   return nfiles;
1985 }
1986
1987 /* Merge the input FILES.  NTEMPS is the number of files at the
1988    start of FILES that are temporary; it is zero at the top level.
1989    NFILES is the total number of files.  Put the output in
1990    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
1991
1992 static void
1993 merge (char **files, size_t ntemps, size_t nfiles, char const *output_file)
1994 {
1995   while (NMERGE < nfiles)
1996     {
1997       /* Number of input files processed so far.  */
1998       size_t in;
1999
2000       /* Number of output files generated so far.  */
2001       size_t out;
2002
2003       /* nfiles % NMERGE; this counts input files that are left over
2004          after all full-sized merges have been done.  */
2005       size_t remainder;
2006
2007       /* Number of easily-available slots at the next loop iteration.  */
2008       size_t cheap_slots;
2009
2010       /* Do as many NMERGE-size merges as possible.  */
2011       for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2012         {
2013           FILE *tfp;
2014           char *temp = create_temp_file (&tfp);
2015           size_t nt = MIN (ntemps, NMERGE);
2016           ntemps -= nt;
2017           mergefps (&files[in], nt, NMERGE, tfp, temp);
2018           files[out] = temp;
2019         }
2020
2021       remainder = nfiles - in;
2022       cheap_slots = NMERGE - out % NMERGE;
2023
2024       if (cheap_slots < remainder)
2025         {
2026           /* So many files remain that they can't all be put into the last
2027              NMERGE-sized output window.  Do one more merge.  Merge as few
2028              files as possible, to avoid needless I/O.  */
2029           size_t nshortmerge = remainder - cheap_slots + 1;
2030           FILE *tfp;
2031           char *temp = create_temp_file (&tfp);
2032           size_t nt = MIN (ntemps, nshortmerge);
2033           ntemps -= nt;
2034           mergefps (&files[in], nt, nshortmerge, tfp, temp);
2035           files[out++] = temp;
2036           in += nshortmerge;
2037         }
2038
2039       /* Put the remaining input files into the last NMERGE-sized output
2040          window, so they will be merged in the next pass.  */
2041       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2042       ntemps += out;
2043       nfiles -= in - out;
2044     }
2045
2046   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2047   mergefps (files, ntemps, nfiles, NULL, output_file);
2048 }
2049
2050 /* Sort NFILES FILES onto OUTPUT_FILE. */
2051
2052 static void
2053 sort (char * const *files, size_t nfiles, char const *output_file)
2054 {
2055   struct buffer buf;
2056   size_t ntemps = 0;
2057   bool output_file_created = false;
2058
2059   buf.alloc = 0;
2060
2061   while (nfiles)
2062     {
2063       char const *temp_output;
2064       char const *file = *files;
2065       FILE *fp = xfopen (file, "r");
2066       FILE *tfp;
2067       size_t bytes_per_line = (2 * sizeof (struct line)
2068                                - sizeof (struct line) / 2);
2069
2070       if (! buf.alloc)
2071         initbuf (&buf, bytes_per_line,
2072                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2073       buf.eof = false;
2074       files++;
2075       nfiles--;
2076
2077       while (fillbuf (&buf, fp, file))
2078         {
2079           struct line *line;
2080           struct line *linebase;
2081
2082           if (buf.eof && nfiles
2083               && (bytes_per_line + 1
2084                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2085             {
2086               /* End of file, but there is more input and buffer room.
2087                  Concatenate the next input file; this is faster in
2088                  the usual case.  */
2089               buf.left = buf.used;
2090               break;
2091             }
2092
2093           line = buffer_linelim (&buf);
2094           linebase = line - buf.nlines;
2095           if (1 < buf.nlines)
2096             sortlines (line, buf.nlines, linebase);
2097           if (buf.eof && !nfiles && !ntemps && !buf.left)
2098             {
2099               xfclose (fp, file);
2100               tfp = xfopen (output_file, "w");
2101               temp_output = output_file;
2102               output_file_created = true;
2103             }
2104           else
2105             {
2106               ++ntemps;
2107               temp_output = create_temp_file (&tfp);
2108             }
2109
2110           do
2111             {
2112               line--;
2113               write_bytes (line->text, line->length, tfp, temp_output);
2114               if (unique)
2115                 while (linebase < line && compare (line, line - 1) == 0)
2116                   line--;
2117             }
2118           while (linebase < line);
2119
2120           xfclose (tfp, temp_output);
2121
2122           if (output_file_created)
2123             goto finish;
2124         }
2125       xfclose (fp, file);
2126     }
2127
2128  finish:
2129   free (buf.buf);
2130
2131   if (! output_file_created)
2132     {
2133       size_t i;
2134       struct tempnode *node = temphead;
2135       char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2136       for (i = 0; node; i++)
2137         {
2138           tempfiles[i] = node->name;
2139           node = node->next;
2140         }
2141       merge (tempfiles, ntemps, ntemps, output_file);
2142       free (tempfiles);
2143     }
2144 }
2145
2146 /* Insert key KEY at the end of the key list.  */
2147
2148 static void
2149 insertkey (struct keyfield *key)
2150 {
2151   struct keyfield **p;
2152
2153   for (p = &keylist; *p; p = &(*p)->next)
2154     continue;
2155   *p = key;
2156   key->next = NULL;
2157 }
2158
2159 /* Report a bad field specification SPEC, with extra info MSGID.  */
2160
2161 static void badfieldspec (char const *, char const *)
2162      ATTRIBUTE_NORETURN;
2163 static void
2164 badfieldspec (char const *spec, char const *msgid)
2165 {
2166   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2167          _(msgid), spec);
2168   abort ();
2169 }
2170
2171 /* Parse the leading integer in STRING and store the resulting value
2172    (which must fit into size_t) into *VAL.  Return the address of the
2173    suffix after the integer.  If MSGID is NULL, return NULL after
2174    failure; otherwise, report MSGID and exit on failure.  */
2175
2176 static char const *
2177 parse_field_count (char const *string, size_t *val, char const *msgid)
2178 {
2179   char *suffix;
2180   uintmax_t n;
2181
2182   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2183     {
2184     case LONGINT_OK:
2185     case LONGINT_INVALID_SUFFIX_CHAR:
2186       *val = n;
2187       if (*val == n)
2188         break;
2189       /* Fall through.  */
2190     case LONGINT_OVERFLOW:
2191     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2192       if (msgid)
2193         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2194                _(msgid), (int) (suffix - string), string);
2195       return NULL;
2196
2197     case LONGINT_INVALID:
2198       if (msgid)
2199         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2200                _(msgid), string);
2201       return NULL;
2202     }
2203
2204   return suffix;
2205 }
2206
2207 /* Handle interrupts and hangups. */
2208
2209 static void
2210 sighandler (int sig)
2211 {
2212 #ifndef SA_NOCLDSTOP
2213   signal (sig, SIG_IGN);
2214 #endif
2215
2216   cleanup ();
2217
2218   signal (sig, SIG_DFL);
2219   raise (sig);
2220 }
2221
2222 /* Set the ordering options for KEY specified in S.
2223    Return the address of the first character in S that
2224    is not a valid ordering option.
2225    BLANKTYPE is the kind of blanks that 'b' should skip. */
2226
2227 static char *
2228 set_ordering (register const char *s, struct keyfield *key,
2229               enum blanktype blanktype)
2230 {
2231   while (*s)
2232     {
2233       switch (*s)
2234         {
2235         case 'b':
2236           if (blanktype == bl_start || blanktype == bl_both)
2237             key->skipsblanks = true;
2238           if (blanktype == bl_end || blanktype == bl_both)
2239             key->skipeblanks = true;
2240           break;
2241         case 'd':
2242           key->ignore = nondictionary;
2243           break;
2244         case 'f':
2245           key->translate = fold_toupper;
2246           break;
2247         case 'g':
2248           key->general_numeric = true;
2249           break;
2250         case 'i':
2251           /* Option order should not matter, so don't let -i override
2252              -d.  -d implies -i, but -i does not imply -d.  */
2253           if (! key->ignore)
2254             key->ignore = nonprinting;
2255           break;
2256         case 'M':
2257           key->month = true;
2258           break;
2259         case 'n':
2260           key->numeric = true;
2261           break;
2262         case 'r':
2263           key->reverse = true;
2264           break;
2265         default:
2266           return (char *) s;
2267         }
2268       ++s;
2269     }
2270   return (char *) s;
2271 }
2272
2273 static struct keyfield *
2274 new_key (void)
2275 {
2276   struct keyfield *key = xzalloc (sizeof *key);
2277   key->eword = SIZE_MAX;
2278   return key;
2279 }
2280
2281 int
2282 main (int argc, char **argv)
2283 {
2284   struct keyfield *key;
2285   struct keyfield gkey;
2286   char const *s;
2287   int c = 0;
2288   bool checkonly = false;
2289   bool mergeonly = false;
2290   size_t nfiles = 0;
2291   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2292   bool obsolete_usage = (posix2_version () < 200112);
2293   char const *short_options = (obsolete_usage
2294                                ? COMMON_SHORT_OPTIONS "y::"
2295                                : COMMON_SHORT_OPTIONS "y:");
2296   char *minus = "-", **files;
2297   char const *outfile = NULL;
2298
2299   initialize_main (&argc, &argv);
2300   program_name = argv[0];
2301   setlocale (LC_ALL, "");
2302   bindtextdomain (PACKAGE, LOCALEDIR);
2303   textdomain (PACKAGE);
2304
2305   atexit (cleanup);
2306
2307   initialize_exit_failure (SORT_FAILURE);
2308   atexit (close_stdout);
2309
2310   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2311 #if HAVE_NL_LANGINFO
2312   hard_LC_TIME = hard_locale (LC_TIME);
2313 #endif
2314
2315 #if HAVE_SETLOCALE
2316   /* Let's get locale's representation of the decimal point */
2317   {
2318     struct lconv const *lconvp = localeconv ();
2319
2320     /* If the locale doesn't define a decimal point, or if the decimal
2321        point is multibyte, use the C decimal point.  We don't support
2322        multibyte decimal points yet.  */
2323     decimal_point = *lconvp->decimal_point;
2324     if (! decimal_point || lconvp->decimal_point[1])
2325       decimal_point = C_DECIMAL_POINT;
2326
2327     /* We don't support multibyte thousands separators yet.  */
2328     th_sep = *lconvp->thousands_sep;
2329     if (! th_sep || lconvp->thousands_sep[1])
2330       th_sep = CHAR_MAX + 1;
2331   }
2332 #endif
2333
2334   have_read_stdin = false;
2335   inittables ();
2336
2337   {
2338     size_t i;
2339     static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2340     enum { nsigs = sizeof sig / sizeof sig[0] };
2341
2342 #ifdef SA_NOCLDSTOP
2343     struct sigaction act;
2344
2345     sigemptyset (&caught_signals);
2346     for (i = 0; i < nsigs; i++)
2347       {
2348         sigaction (sig[i], NULL, &act);
2349         if (act.sa_handler != SIG_IGN)
2350           sigaddset (&caught_signals, sig[i]);
2351       }
2352
2353     act.sa_handler = sighandler;
2354     act.sa_mask = caught_signals;
2355     act.sa_flags = 0;
2356
2357     for (i = 0; i < nsigs; i++)
2358       if (sigismember (&caught_signals, sig[i]))
2359         sigaction (sig[i], &act, NULL);
2360 #else
2361     for (i = 0; i < nsigs; i++)
2362       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2363         signal (sig[i], sighandler);
2364 #endif
2365   }
2366
2367   gkey.sword = gkey.eword = SIZE_MAX;
2368   gkey.ignore = NULL;
2369   gkey.translate = NULL;
2370   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2371   gkey.skipsblanks = gkey.skipeblanks = false;
2372
2373   files = xnmalloc (argc, sizeof *files);
2374
2375   for (;;)
2376     {
2377       /* Parse an operand as a file after "--" was seen; or if
2378          pedantic and a file was seen, unless the POSIX version
2379          predates 1003.1-2001 and -c was not seen and the operand is
2380          "-o FILE" or "-oFILE".  */
2381
2382       if (c == -1
2383           || (posixly_correct && nfiles != 0
2384               && ! (obsolete_usage
2385                     && ! checkonly
2386                     && optind != argc
2387                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2388                     && (argv[optind][2] || optind + 1 != argc)))
2389           || ((c = getopt_long (argc, argv, short_options,
2390                                 long_options, NULL))
2391               == -1))
2392         {
2393           if (argc <= optind)
2394             break;
2395           files[nfiles++] = argv[optind++];
2396         }
2397       else switch (c)
2398         {
2399         case 1:
2400           key = NULL;
2401           if (obsolete_usage && optarg[0] == '+')
2402             {
2403               /* Treat +POS1 [-POS2] as a key if possible; but silently
2404                  treat an operand as a file if it is not a valid +POS1.  */
2405               key = new_key ();
2406               s = parse_field_count (optarg + 1, &key->sword, NULL);
2407               if (s && *s == '.')
2408                 s = parse_field_count (s + 1, &key->schar, NULL);
2409               if (! (key->sword | key->schar))
2410                 key->sword = SIZE_MAX;
2411               if (! s || *set_ordering (s, key, bl_start))
2412                 {
2413                   free (key);
2414                   key = NULL;
2415                 }
2416               else
2417                 {
2418                   if (optind != argc && argv[optind][0] == '-'
2419                       && ISDIGIT (argv[optind][1]))
2420                     {
2421                       char const *optarg1 = argv[optind++];
2422                       s = parse_field_count (optarg1 + 1, &key->eword,
2423                                              N_("invalid number after `-'"));
2424                       if (*s == '.')
2425                         s = parse_field_count (s + 1, &key->echar,
2426                                                N_("invalid number after `.'"));
2427                       if (*set_ordering (s, key, bl_end))
2428                         badfieldspec (optarg1,
2429                                       N_("stray character in field spec"));
2430                     }
2431                   insertkey (key);
2432                 }
2433             }
2434           if (! key)
2435             files[nfiles++] = optarg;
2436           break;
2437
2438         case 'b':
2439         case 'd':
2440         case 'f':
2441         case 'g':
2442         case 'i':
2443         case 'M':
2444         case 'n':
2445         case 'r':
2446           {
2447             char str[2];
2448             str[0] = c;
2449             str[1] = '\0';
2450             set_ordering (str, &gkey, bl_both);
2451           }
2452           break;
2453
2454         case 'c':
2455           checkonly = true;
2456           break;
2457
2458         case 'k':
2459           key = new_key ();
2460
2461           /* Get POS1. */
2462           s = parse_field_count (optarg, &key->sword,
2463                                  N_("invalid number at field start"));
2464           if (! key->sword--)
2465             {
2466               /* Provoke with `sort -k0' */
2467               badfieldspec (optarg, N_("field number is zero"));
2468             }
2469           if (*s == '.')
2470             {
2471               s = parse_field_count (s + 1, &key->schar,
2472                                      N_("invalid number after `.'"));
2473               if (! key->schar--)
2474                 {
2475                   /* Provoke with `sort -k1.0' */
2476                   badfieldspec (optarg, N_("character offset is zero"));
2477                 }
2478             }
2479           if (! (key->sword | key->schar))
2480             key->sword = SIZE_MAX;
2481           s = set_ordering (s, key, bl_start);
2482           if (*s != ',')
2483             {
2484               key->eword = SIZE_MAX;
2485               key->echar = 0;
2486             }
2487           else
2488             {
2489               /* Get POS2. */
2490               s = parse_field_count (s + 1, &key->eword,
2491                                      N_("invalid number after `,'"));
2492               if (! key->eword--)
2493                 {
2494                   /* Provoke with `sort -k1,0' */
2495                   badfieldspec (optarg, N_("field number is zero"));
2496                 }
2497               if (*s == '.')
2498                 s = parse_field_count (s + 1, &key->echar,
2499                                        N_("invalid number after `.'"));
2500               else
2501                 {
2502                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2503                   key->eword++;
2504                 }
2505               s = set_ordering (s, key, bl_end);
2506             }
2507           if (*s)
2508             badfieldspec (optarg, N_("stray character in field spec"));
2509           insertkey (key);
2510           break;
2511
2512         case 'm':
2513           mergeonly = true;
2514           break;
2515
2516         case 'o':
2517           if (outfile && !STREQ (outfile, optarg))
2518             error (SORT_FAILURE, 0, _("multiple output files specified"));
2519           outfile = optarg;
2520           break;
2521
2522         case 's':
2523           stable = true;
2524           break;
2525
2526         case 'S':
2527           specify_sort_size (optarg);
2528           break;
2529
2530         case 't':
2531           {
2532             char newtab = optarg[0];
2533             if (! newtab)
2534               error (SORT_FAILURE, 0, _("empty tab"));
2535             if (optarg[1])
2536               {
2537                 if (STREQ (optarg, "\\0"))
2538                   newtab = '\0';
2539                 else
2540                   {
2541                     /* Provoke with `sort -txx'.  Complain about
2542                        "multi-character tab" instead of "multibyte tab", so
2543                        that the diagnostic's wording does not need to be
2544                        changed once multibyte characters are supported.  */
2545                     error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
2546                            optarg);
2547                   }
2548               }
2549             if (tab != TAB_DEFAULT && tab != newtab)
2550               error (SORT_FAILURE, 0, _("incompatible tabs"));
2551             tab = newtab;
2552           }
2553           break;
2554
2555         case 'T':
2556           add_temp_dir (optarg);
2557           break;
2558
2559         case 'u':
2560           unique = true;
2561           break;
2562
2563         case 'y':
2564           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
2565              through Solaris 7.  It is also accepted by many non-Solaris
2566              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
2567              -y is marked as obsolete starting with Solaris 8 (1999), but is
2568              still accepted as of Solaris 10 prerelease (2004).
2569
2570              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
2571              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
2572              and which in general ignores the argument after "-y" if it
2573              consists entirely of digits (it can even be empty).  */
2574           if (!optarg && optind != argc)
2575             {
2576               char const *p;
2577               for (p = argv[optind]; ISDIGIT (*p); p++)
2578                 continue;
2579               optind += !*p;
2580             }
2581           break;
2582
2583         case 'z':
2584           eolchar = 0;
2585           break;
2586
2587         case_GETOPT_HELP_CHAR;
2588
2589         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2590
2591         default:
2592           usage (SORT_FAILURE);
2593         }
2594     }
2595
2596   /* Inheritance of global options to individual keys. */
2597   for (key = keylist; key; key = key->next)
2598     if (! (key->ignore || key->translate
2599            || (key->skipsblanks | key->reverse
2600                | key->skipeblanks | key->month | key->numeric
2601                | key->general_numeric)))
2602       {
2603         key->ignore = gkey.ignore;
2604         key->translate = gkey.translate;
2605         key->skipsblanks = gkey.skipsblanks;
2606         key->skipeblanks = gkey.skipeblanks;
2607         key->month = gkey.month;
2608         key->numeric = gkey.numeric;
2609         key->general_numeric = gkey.general_numeric;
2610         key->reverse = gkey.reverse;
2611       }
2612
2613   if (!keylist && (gkey.ignore || gkey.translate
2614                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2615                        | gkey.numeric | gkey.general_numeric)))
2616     insertkey (&gkey);
2617   reverse = gkey.reverse;
2618
2619   if (temp_dir_count == 0)
2620     {
2621       char const *tmp_dir = getenv ("TMPDIR");
2622       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2623     }
2624
2625   if (nfiles == 0)
2626     {
2627       nfiles = 1;
2628       files = &minus;
2629     }
2630
2631   if (checkonly)
2632     {
2633       if (nfiles > 1)
2634         {
2635           error (0, 0, _("extra operand %s not allowed with -c"),
2636                  quote (files[1]));
2637           usage (SORT_FAILURE);
2638         }
2639
2640       /* POSIX requires that sort return 1 IFF invoked with -c and the
2641          input is not properly sorted.  */
2642       exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2643     }
2644
2645   if (mergeonly)
2646     merge (files, 0, nfiles, outfile);
2647   else
2648     sort (files, nfiles, outfile);
2649
2650   if (have_read_stdin && fclose (stdin) == EOF)
2651     die (_("close failed"), "-");
2652
2653   exit (EXIT_SUCCESS);
2654 }