src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-2003 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <signal.h>
  29 #include <stdio.h>
  30 #include <assert.h>
  31 #include "system.h"
  32 #include "error.h"
  33 #include "exitfail.h"
  34 #include "hard-locale.h"
  35 #include "inttostr.h"
  36 #include "long-options.h"
  37 #include "physmem.h"
  38 #include "posixver.h"
  39 #include "stdio-safer.h"
  40 #include "xmemcoll.h"
  41 #include "xstrtol.h"
  42
  43 #if HAVE_SYS_RESOURCE_H
  44 # include <sys/resource.h>
  45 #endif
  46 #ifndef RLIMIT_DATA
  47 struct rlimit { size_t rlim_cur; };
  48 # define getrlimit(Resource, Rlp) (-1)
  49 #endif
  50
  51 /* The official name of this program (e.g., no `g' prefix).  */
  52 #define PROGRAM_NAME "sort"
  53
  54 #define AUTHORS "Mike Haertel", "Paul Eggert"
  55
  56 #if HAVE_LANGINFO_CODESET
  57 # include <langinfo.h>
  58 #endif
  59
  60 #ifndef SA_NOCLDSTOP
  61 # define sigprocmask(How, Set, Oset) /* empty */
  62 # define sigset_t int
  63 #endif
  64
  65 #ifndef STDC_HEADERS
  66 double strtod ();
  67 #endif
  68
  69 #define UCHAR_LIM (UCHAR_MAX + 1)
  70 #define UCHAR(c) ((unsigned char) (c))
  71
  72 #ifndef DEFAULT_TMPDIR
  73 # define DEFAULT_TMPDIR "/tmp"
  74 #endif
  75
  76 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  77    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  78    that sort exit with status 1 IFF invoked with -c and the input is
  79    not properly sorted.  Any other irregular exit must exit with a
  80    status code greater than 1.  */
  81 #define SORT_FAILURE 2
  82 #define SORT_OUT_OF_ORDER 1
  83
  84 #define C_DECIMAL_POINT '.'
  85 #define NEGATION_SIGN   '-'
  86 #define NUMERIC_ZERO    '0'
  87
  88 #if HAVE_SETLOCALE
  89
  90 static char decimal_point;
  91 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
  92
  93 /* Nonzero if the corresponding locales are hard.  */
  94 static bool hard_LC_COLLATE;
  95 # if HAVE_NL_LANGINFO
  96 static bool hard_LC_TIME;
  97 # endif
  98
  99 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 100
 101 #else
 102
 103 # define decimal_point C_DECIMAL_POINT
 104 # define IS_THOUSANDS_SEP(x) 0
 105
 106 #endif
 107
 108 #define NONZERO(x) (x != 0)
 109
 110 /* The kind of blanks for '-b' to skip in various options. */
 111 enum blanktype { bl_start, bl_end, bl_both };
 112
 113 /* The character marking end of line. Default to \n. */
 114 static char eolchar = '\n';
 115
 116 /* Lines are held in core as counted strings. */
 117 struct line
 118 {
 119   char *text;                   /* Text of the line. */
 120   size_t length;                /* Length including final newline. */
 121   char *keybeg;                 /* Start of first key. */
 122   char *keylim;                 /* Limit of first key. */
 123 };
 124
 125 /* Input buffers. */
 126 struct buffer
 127 {
 128   char *buf;                    /* Dynamically allocated buffer,
 129                                    partitioned into 3 regions:
 130                                    - input data;
 131                                    - unused area;
 132                                    - an array of lines, in reverse order.  */
 133   size_t used;                  /* Number of bytes used for input data.  */
 134   size_t nlines;                /* Number of lines in the line array.  */
 135   size_t alloc;                 /* Number of bytes allocated. */
 136   size_t left;                  /* Number of bytes left from previous reads. */
 137   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 138   bool eof;                     /* An EOF has been read.  */
 139 };
 140
 141 struct keyfield
 142 {
 143   size_t sword;                 /* Zero-origin 'word' to start at. */
 144   size_t schar;                 /* Additional characters to skip. */
 145   size_t eword;                 /* Zero-origin first word after field. */
 146   size_t echar;                 /* Additional characters in field. */
 147   bool const *ignore;           /* Boolean array of characters to ignore. */
 148   char const *translate;        /* Translation applied to characters. */
 149   bool skipsblanks;             /* Skip leading blanks at start. */
 150   bool skipeblanks;             /* Skip trailing blanks at finish. */
 151   bool numeric;                 /* Flag for numeric comparison.  Handle
 152                                    strings of digits with optional decimal
 153                                    point, but no exponential notation. */
 154   bool general_numeric;         /* Flag for general, numeric comparison.
 155                                    Handle numbers in exponential notation. */
 156   bool month;                   /* Flag for comparison by month name. */
 157   bool reverse;                 /* Reverse the sense of comparison. */
 158   struct keyfield *next;        /* Next keyfield to try. */
 159 };
 160
 161 struct month
 162 {
 163   char const *name;
 164   int val;
 165 };
 166
 167 /* The name this program was run with. */
 168 char *program_name;
 169
 170 /* FIXME: None of these tables work with multibyte character sets.
 171    Also, there are many other bugs when handling multibyte characters.
 172    One way to fix this is to rewrite `sort' to use wide characters
 173    internally, but doing this with good performance is a bit
 174    tricky.  */
 175
 176 /* Table of blanks.  */
 177 static bool blanks[UCHAR_LIM];
 178
 179 /* Table of non-printing characters. */
 180 static bool nonprinting[UCHAR_LIM];
 181
 182 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 183 static bool nondictionary[UCHAR_LIM];
 184
 185 /* Translation table folding lower case to upper.  */
 186 static char fold_toupper[UCHAR_LIM];
 187
 188 #define MONTHS_PER_YEAR 12
 189
 190 /* Table mapping month names to integers.
 191    Alphabetic order allows binary search. */
 192 static struct month monthtab[] =
 193 {
 194   {"APR", 4},
 195   {"AUG", 8},
 196   {"DEC", 12},
 197   {"FEB", 2},
 198   {"JAN", 1},
 199   {"JUL", 7},
 200   {"JUN", 6},
 201   {"MAR", 3},
 202   {"MAY", 5},
 203   {"NOV", 11},
 204   {"OCT", 10},
 205   {"SEP", 9}
 206 };
 207
 208 /* During the merge phase, the number of files to merge at once. */
 209 #define NMERGE 16
 210
 211 /* Minimum size for a merge or check buffer.  */
 212 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 213
 214 /* Minimum sort size; the code might not work with smaller sizes.  */
 215 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 216
 217 /* The number of bytes needed for a merge or check buffer, which can
 218    function relatively efficiently even if it holds only one line.  If
 219    a longer line is seen, this value is increased.  */
 220 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 221
 222 /* The approximate maximum number of bytes of main memory to use, as
 223    specified by the user.  Zero if the user has not specified a size.  */
 224 static size_t sort_size;
 225
 226 /* The guessed size for non-regular files.  */
 227 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 228
 229 /* Array of directory names in which any temporary files are to be created. */
 230 static char const **temp_dirs;
 231
 232 /* Number of temporary directory names used.  */
 233 static size_t temp_dir_count;
 234
 235 /* Number of allocated slots in temp_dirs.  */
 236 static size_t temp_dir_alloc;
 237
 238 /* Flag to reverse the order of all comparisons. */
 239 static bool reverse;
 240
 241 /* Flag for stable sort.  This turns off the last ditch bytewise
 242    comparison of lines, and instead leaves lines in the same order
 243    they were read if all keys compare equal.  */
 244 static bool stable;
 245
 246 /* If TAB has this value, blanks separate fields.  */
 247 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 248
 249 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 250    separated by the empty string between a non-blank character and a blank
 251    character. */
 252 static int tab = TAB_DEFAULT;
 253
 254 /* Flag to remove consecutive duplicate lines from the output.
 255    Only the last of a sequence of equal lines will be output. */
 256 static bool unique;
 257
 258 /* Nonzero if any of the input files are the standard input. */
 259 static bool have_read_stdin;
 260
 261 /* List of key field comparisons to be tried.  */
 262 static struct keyfield *keylist;
 263
 264 static void sortlines_temp (struct line *, size_t, struct line *);
 265
 266 void
 267 usage (int status)
 268 {
 269   if (status != 0)
 270     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 271              program_name);
 272   else
 273     {
 274       printf (_("\
 275 Usage: %s [OPTION]... [FILE]...\n\
 276 "),
 277               program_name);
 278       fputs (_("\
 279 Write sorted concatenation of all FILE(s) to standard output.\n\
 280 \n\
 281 Ordering options:\n\
 282 \n\
 283 "), stdout);
 284       fputs (_("\
 285 Mandatory arguments to long options are mandatory for short options too.\n\
 286 "), stdout);
 287       fputs (_("\
 288   -b, --ignore-leading-blanks ignore leading blanks\n\
 289   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 290   -f, --ignore-case           fold lower case to upper case characters\n\
 291 "), stdout);
 292       fputs (_("\
 293   -g, --general-numeric-sort  compare according to general numerical value\n\
 294   -i, --ignore-nonprinting    consider only printable characters\n\
 295   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 296   -n, --numeric-sort          compare according to string numerical value\n\
 297   -r, --reverse               reverse the result of comparisons\n\
 298 \n\
 299 "), stdout);
 300       fputs (_("\
 301 Other options:\n\
 302 \n\
 303   -c, --check               check whether input is sorted; do not sort\n\
 304   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 305   -m, --merge               merge already sorted files; do not sort\n\
 306   -o, --output=FILE         write result to FILE instead of standard output\n\
 307   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 308   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 309 "), stdout);
 310       printf (_("\
 311   -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
 312   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s\n\
 313                               multiple options specify multiple directories\n\
 314   -u, --unique              with -c: check for strict ordering\n\
 315                               otherwise: output only the first of an equal run\n\
 316 "), DEFAULT_TMPDIR);
 317       fputs (_("\
 318   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 319 "), stdout);
 320       fputs (HELP_OPTION_DESCRIPTION, stdout);
 321       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 322       fputs (_("\
 323 \n\
 324 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 325 in the field.  OPTS is one or more single-letter ordering options, which\n\
 326 override global ordering options for that key.  If no key is given, use the\n\
 327 entire line as the key.\n\
 328 \n\
 329 SIZE may be followed by the following multiplicative suffixes:\n\
 330 "), stdout);
 331       fputs (_("\
 332 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 333 \n\
 334 With no FILE, or when FILE is -, read standard input.\n\
 335 \n\
 336 *** WARNING ***\n\
 337 The locale specified by the environment affects sort order.\n\
 338 Set LC_ALL=C to get the traditional sort order that uses\n\
 339 native byte values.\n\
 340 "), stdout );
 341       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 342     }
 343   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 344      POSIX requires that sort return 1 IFF invoked with -c and
 345      the input is not properly sorted.  */
 346   assert (status == 0 || status == SORT_FAILURE);
 347   exit (status);
 348 }
 349
 350 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
 351
 352 static struct option const long_options[] =
 353 {
 354   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 355   {"check", no_argument, NULL, 'c'},
 356   {"dictionary-order", no_argument, NULL, 'd'},
 357   {"ignore-case", no_argument, NULL, 'f'},
 358   {"general-numeric-sort", no_argument, NULL, 'g'},
 359   {"ignore-nonprinting", no_argument, NULL, 'i'},
 360   {"key", required_argument, NULL, 'k'},
 361   {"merge", no_argument, NULL, 'm'},
 362   {"month-sort", no_argument, NULL, 'M'},
 363   {"numeric-sort", no_argument, NULL, 'n'},
 364   {"output", required_argument, NULL, 'o'},
 365   {"reverse", no_argument, NULL, 'r'},
 366   {"stable", no_argument, NULL, 's'},
 367   {"buffer-size", required_argument, NULL, 'S'},
 368   {"field-separator", required_argument, NULL, 't'},
 369   {"temporary-directory", required_argument, NULL, 'T'},
 370   {"unique", no_argument, NULL, 'u'},
 371   {"zero-terminated", no_argument, NULL, 'z'},
 372   {GETOPT_HELP_OPTION_DECL},
 373   {GETOPT_VERSION_OPTION_DECL},
 374   {0, 0, 0, 0},
 375 };
 376
 377 /* The set of signals that are caught.  */
 378 static sigset_t caught_signals;
 379
 380 /* The list of temporary files. */
 381 struct tempnode
 382 {
 383   struct tempnode *volatile next;
 384   char name[1];  /* Actual size is 1 + file name length.  */
 385 };
 386 static struct tempnode *volatile temphead;
 387
 388 /* Clean up any remaining temporary files. */
 389
 390 static void
 391 cleanup (void)
 392 {
 393   struct tempnode const *node;
 394
 395   for (node = temphead; node; node = node->next)
 396     unlink (node->name);
 397 }
 398
 399 /* Report MESSAGE for FILE, then clean up and exit.  */
 400
 401 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 402 static void
 403 die (char const *message, char const *file)
 404 {
 405   error (0, errno, "%s: %s", message, file);
 406   exit (SORT_FAILURE);
 407 }
 408
 409 /* Create a new temporary file, returning its newly allocated name.
 410    Store into *PFP a stream open for writing.  */
 411
 412 static char *
 413 create_temp_file (FILE **pfp)
 414 {
 415   static char const slashbase[] = "/sortXXXXXX";
 416   static size_t temp_dir_index;
 417   sigset_t oldset;
 418   int fd;
 419   int saved_errno;
 420   char const *temp_dir = temp_dirs[temp_dir_index];
 421   size_t len = strlen (temp_dir);
 422   struct tempnode *node =
 423     xmalloc (sizeof node->next + len + sizeof slashbase);
 424   char *file = node->name;
 425
 426   memcpy (file, temp_dir, len);
 427   memcpy (file + len, slashbase, sizeof slashbase);
 428   node->next = temphead;
 429   if (++temp_dir_index == temp_dir_count)
 430     temp_dir_index = 0;
 431
 432   /* Create the temporary file in a critical section, to avoid races.  */
 433   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 434   fd = mkstemp (file);
 435   if (0 <= fd)
 436     temphead = node;
 437   saved_errno = errno;
 438   sigprocmask (SIG_SETMASK, &oldset, NULL);
 439   errno = saved_errno;
 440
 441   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 442     die (_("cannot create temporary file"), file);
 443
 444   return file;
 445 }
 446
 447 static FILE *
 448 xfopen (const char *file, const char *how)
 449 {
 450   FILE *fp;
 451
 452   if (STREQ (file, "-"))
 453     {
 454       if (*how == 'r')
 455         {
 456           have_read_stdin = true;
 457           fp = stdin;
 458         }
 459       else
 460         fp = stdout;
 461     }
 462   else
 463     {
 464       if ((fp = fopen_safer (file, how)) == NULL)
 465         die (_("open failed"), file);
 466     }
 467
 468   return fp;
 469 }
 470
 471 /* Close FP, whose name is FILE, and report any errors.  */
 472
 473 static void
 474 xfclose (FILE *fp, char const *file)
 475 {
 476   if (fp == stdin)
 477     {
 478       /* Allow reading stdin from tty more than once. */
 479       if (feof (fp))
 480         clearerr (fp);
 481     }
 482   else
 483     {
 484       if (fclose (fp) != 0)
 485         die (_("close failed"), file);
 486     }
 487 }
 488
 489 static void
 490 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 491 {
 492   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 493     die (_("write failed"), output_file);
 494 }
 495
 496 /* Append DIR to the array of temporary directory names.  */
 497 static void
 498 add_temp_dir (char const *dir)
 499 {
 500   if (temp_dir_count == temp_dir_alloc)
 501     {
 502       temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
 503       temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
 504     }
 505
 506   temp_dirs[temp_dir_count++] = dir;
 507 }
 508
 509 /* Search through the list of temporary files for NAME;
 510    remove it if it is found on the list. */
 511
 512 static void
 513 zaptemp (const char *name)
 514 {
 515   struct tempnode *volatile *pnode;
 516   struct tempnode *node;
 517
 518   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 519     if (node->name == name)
 520       {
 521         unlink (name);
 522         *pnode = node->next;
 523         free (node);
 524         break;
 525       }
 526 }
 527
 528 #if HAVE_NL_LANGINFO
 529
 530 static int
 531 struct_month_cmp (const void *m1, const void *m2)
 532 {
 533   struct month const *month1 = m1;
 534   struct month const *month2 = m2;
 535   return strcmp (month1->name, month2->name);
 536 }
 537
 538 #endif
 539
 540 /* Initialize the character class tables. */
 541
 542 static void
 543 inittables (void)
 544 {
 545   int i;
 546
 547   for (i = 0; i < UCHAR_LIM; ++i)
 548     {
 549       blanks[i] = !!ISBLANK (i);
 550       nonprinting[i] = !ISPRINT (i);
 551       nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
 552       fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
 553     }
 554
 555 #if HAVE_NL_LANGINFO
 556   /* If we're not in the "C" locale, read different names for months.  */
 557   if (hard_LC_TIME)
 558     {
 559       for (i = 0; i < MONTHS_PER_YEAR; i++)
 560         {
 561           char const *s;
 562           size_t s_len;
 563           size_t j;
 564           char *name;
 565
 566           s = (char *) nl_langinfo (ABMON_1 + i);
 567           s_len = strlen (s);
 568           monthtab[i].name = name = xmalloc (s_len + 1);
 569           monthtab[i].val = i + 1;
 570
 571           for (j = 0; j < s_len; j++)
 572             name[j] = fold_toupper[UCHAR (s[j])];
 573           name[j] = '\0';
 574         }
 575       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 576              sizeof (struct month), struct_month_cmp);
 577     }
 578 #endif
 579 }
 580
 581 /* Specify the amount of main memory to use when sorting.  */
 582 static void
 583 specify_sort_size (char const *s)
 584 {
 585   uintmax_t n;
 586   char *suffix;
 587   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
 588
 589   /* The default unit is KiB.  */
 590   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 591     {
 592       if (n <= UINTMAX_MAX / 1024)
 593         n *= 1024;
 594       else
 595         e = LONGINT_OVERFLOW;
 596     }
 597
 598   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 599   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 600     switch (suffix[0])
 601       {
 602       case 'b':
 603         e = LONGINT_OK;
 604         break;
 605
 606       case '%':
 607         {
 608           double mem = physmem_total () * n / 100;
 609
 610           /* Use "<", not "<=", to avoid problems with rounding.  */
 611           if (mem < UINTMAX_MAX)
 612             {
 613               n = mem;
 614               e = LONGINT_OK;
 615             }
 616           else
 617             e = LONGINT_OVERFLOW;
 618         }
 619         break;
 620       }
 621
 622   if (e == LONGINT_OK)
 623     {
 624       /* If multiple sort sizes are specified, take the maximum, so
 625          that option order does not matter.  */
 626       if (n < sort_size)
 627         return;
 628
 629       sort_size = n;
 630       if (sort_size == n)
 631         {
 632           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 633           return;
 634         }
 635
 636       e = LONGINT_OVERFLOW;
 637     }
 638
 639   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 640 }
 641
 642 /* Return the default sort size.  */
 643 static size_t
 644 default_sort_size (void)
 645 {
 646   /* Let MEM be available memory or 1/8 of total memory, whichever
 647      is greater.  */
 648   double avail = physmem_available ();
 649   double total = physmem_total ();
 650   double mem = MAX (avail, total / 8);
 651   struct rlimit rlimit;
 652
 653   /* Let SIZE be MEM, but no more than the maximum object size or
 654      system resource limits.  Avoid the MIN macro here, as it is not
 655      quite right when only one argument is floating point.  Don't
 656      bother to check for values like RLIM_INFINITY since in practice
 657      they are not much less than SIZE_MAX.  */
 658   size_t size = SIZE_MAX;
 659   if (mem < size)
 660     size = mem;
 661   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 662     size = rlimit.rlim_cur;
 663 #ifdef RLIMIT_AS
 664   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 665     size = rlimit.rlim_cur;
 666 #endif
 667
 668   /* Leave a large safety margin for the above limits, as failure can
 669      occur when they are exceeded.  */
 670   size /= 2;
 671
 672 #ifdef RLIMIT_RSS
 673   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 674      Exceeding RSS is not fatal, but can be quite slow.  */
 675   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 676     size = rlimit.rlim_cur / 16 * 15;
 677 #endif
 678
 679   /* Use no less than the minimum.  */
 680   return MAX (size, MIN_SORT_SIZE);
 681 }
 682
 683 /* Return the sort buffer size to use with the input files identified
 684    by FPS and FILES, which are alternate paths to the same files.
 685    NFILES gives the number of input files; NFPS may be less.  Assume
 686    that each input line requires LINE_BYTES extra bytes' worth of line
 687    information.  Do not exceed a bound on the size: if the bound is
 688    not specified by the user, use a default.  */
 689
 690 static size_t
 691 sort_buffer_size (FILE *const *fps, int nfps,
 692                   char *const *files, int nfiles,
 693                   size_t line_bytes)
 694 {
 695   /* A bound on the input size.  If zero, the bound hasn't been
 696      determined yet.  */
 697   static size_t size_bound;
 698
 699   /* In the worst case, each input byte is a newline.  */
 700   size_t worst_case_per_input_byte = line_bytes + 1;
 701
 702   /* Keep enough room for one extra input line and an extra byte.
 703      This extra room might be needed when preparing to read EOF.  */
 704   size_t size = worst_case_per_input_byte + 1;
 705
 706   int i;
 707
 708   for (i = 0; i < nfiles; i++)
 709     {
 710       struct stat st;
 711       off_t file_size;
 712       size_t worst_case;
 713
 714       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 715            : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
 716            : stat (files[i], &st))
 717           != 0)
 718         die (_("stat failed"), files[i]);
 719
 720       if (S_ISREG (st.st_mode))
 721         file_size = st.st_size;
 722       else
 723         {
 724           /* The file has unknown size.  If the user specified a sort
 725              buffer size, use that; otherwise, guess the size.  */
 726           if (sort_size)
 727             return sort_size;
 728           file_size = INPUT_FILE_SIZE_GUESS;
 729         }
 730
 731       if (! size_bound)
 732         {
 733           size_bound = sort_size;
 734           if (! size_bound)
 735             size_bound = default_sort_size ();
 736         }
 737
 738       /* Add the amount of memory needed to represent the worst case
 739          where the input consists entirely of newlines followed by a
 740          single non-newline.  Check for overflow.  */
 741       worst_case = file_size * worst_case_per_input_byte + 1;
 742       if (file_size != worst_case / worst_case_per_input_byte
 743           || size_bound - size <= worst_case)
 744         return size_bound;
 745       size += worst_case;
 746     }
 747
 748   return size;
 749 }
 750
 751 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 752    must be at least sizeof (struct line).  Allocate ALLOC bytes
 753    initially.  */
 754
 755 static void
 756 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 757 {
 758   /* Ensure that the line array is properly aligned.  If the desired
 759      size cannot be allocated, repeatedly halve it until allocation
 760      succeeds.  The smaller allocation may hurt overall performance,
 761      but that's better than failing.  */
 762   for (;;)
 763     {
 764       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 765       buf->buf = malloc (alloc);
 766       if (buf->buf)
 767         break;
 768       alloc /= 2;
 769       if (alloc <= line_bytes + 1)
 770         xalloc_die ();
 771     }
 772
 773   buf->line_bytes = line_bytes;
 774   buf->alloc = alloc;
 775   buf->used = buf->left = buf->nlines = 0;
 776   buf->eof = false;
 777 }
 778
 779 /* Return one past the limit of the line array.  */
 780
 781 static inline struct line *
 782 buffer_linelim (struct buffer const *buf)
 783 {
 784   return (struct line *) (buf->buf + buf->alloc);
 785 }
 786
 787 /* Return a pointer to the first character of the field specified
 788    by KEY in LINE. */
 789
 790 static char *
 791 begfield (const struct line *line, const struct keyfield *key)
 792 {
 793   register char *ptr = line->text, *lim = ptr + line->length - 1;
 794   register size_t sword = key->sword;
 795   register size_t schar = key->schar;
 796   register size_t remaining_bytes;
 797
 798   /* The leading field separator itself is included in a field when -t
 799      is absent.  */
 800
 801   if (tab != TAB_DEFAULT)
 802     while (ptr < lim && sword--)
 803       {
 804         while (ptr < lim && *ptr != tab)
 805           ++ptr;
 806         if (ptr < lim)
 807           ++ptr;
 808       }
 809   else
 810     while (ptr < lim && sword--)
 811       {
 812         while (ptr < lim && blanks[UCHAR (*ptr)])
 813           ++ptr;
 814         while (ptr < lim && !blanks[UCHAR (*ptr)])
 815           ++ptr;
 816       }
 817
 818   if (key->skipsblanks)
 819     while (ptr < lim && blanks[UCHAR (*ptr)])
 820       ++ptr;
 821
 822   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
 823   remaining_bytes = lim - ptr;
 824   if (schar < remaining_bytes)
 825     ptr += schar;
 826   else
 827     ptr = lim;
 828
 829   return ptr;
 830 }
 831
 832 /* Return the limit of (a pointer to the first character after) the field
 833    in LINE specified by KEY. */
 834
 835 static char *
 836 limfield (const struct line *line, const struct keyfield *key)
 837 {
 838   register char *ptr = line->text, *lim = ptr + line->length - 1;
 839   register size_t eword = key->eword, echar = key->echar;
 840   register size_t remaining_bytes;
 841
 842   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 843      whichever comes first.  If there are more than EWORD fields, leave
 844      PTR pointing at the beginning of the field having zero-based index,
 845      EWORD.  If a delimiter character was specified (via -t), then that
 846      `beginning' is the first character following the delimiting TAB.
 847      Otherwise, leave PTR pointing at the first `blank' character after
 848      the preceding field.  */
 849   if (tab != TAB_DEFAULT)
 850     while (ptr < lim && eword--)
 851       {
 852         while (ptr < lim && *ptr != tab)
 853           ++ptr;
 854         if (ptr < lim && (eword | echar))
 855           ++ptr;
 856       }
 857   else
 858     while (ptr < lim && eword--)
 859       {
 860         while (ptr < lim && blanks[UCHAR (*ptr)])
 861           ++ptr;
 862         while (ptr < lim && !blanks[UCHAR (*ptr)])
 863           ++ptr;
 864       }
 865
 866 #ifdef POSIX_UNSPECIFIED
 867   /* The following block of code makes GNU sort incompatible with
 868      standard Unix sort, so it's ifdef'd out for now.
 869      The POSIX spec isn't clear on how to interpret this.
 870      FIXME: request clarification.
 871
 872      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 873      Date: Thu, 30 May 96 12:20:41 -0400
 874      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
 875
 876      [...]I believe I've found another bug in `sort'.
 877
 878      $ cat /tmp/sort.in
 879      a b c 2 d
 880      pq rs 1 t
 881      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
 882      a b c 2 d
 883      pq rs 1 t
 884      $ /bin/sort -k1.7,1.7 </tmp/sort.in
 885      pq rs 1 t
 886      a b c 2 d
 887
 888      Unix sort produced the answer I expected: sort on the single character
 889      in column 7.  GNU sort produced different results, because it disagrees
 890      on the interpretation of the key-end spec "M.N".  Unix sort reads this
 891      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
 892      "skip M-1 fields, then either N-1 characters or the rest of the current
 893      field, whichever comes first".  This extra clause applies only to
 894      key-ends, not key-starts.
 895      */
 896
 897   /* Make LIM point to the end of (one byte past) the current field.  */
 898   if (tab != TAB_DEFAULT)
 899     {
 900       char *newlim;
 901       newlim = memchr (ptr, tab, lim - ptr);
 902       if (newlim)
 903         lim = newlim;
 904     }
 905   else
 906     {
 907       char *newlim;
 908       newlim = ptr;
 909       while (newlim < lim && blanks[UCHAR (*newlim)])
 910         ++newlim;
 911       while (newlim < lim && !blanks[UCHAR (*newlim)])
 912         ++newlim;
 913       lim = newlim;
 914     }
 915 #endif
 916
 917   /* If we're skipping leading blanks, don't start counting characters
 918      until after skipping past any leading blanks.  */
 919   if (key->skipsblanks)
 920     while (ptr < lim && blanks[UCHAR (*ptr)])
 921       ++ptr;
 922
 923   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 924   remaining_bytes = lim - ptr;
 925   if (echar < remaining_bytes)
 926     ptr += echar;
 927   else
 928     ptr = lim;
 929
 930   return ptr;
 931 }
 932
 933 /* Return the number of trailing blanks in FIELD, with LEN bytes.  */
 934
 935 static size_t
 936 trailing_blanks (char const *field, size_t len)
 937 {
 938   size_t i;
 939   for (i = len; 0 < i && blanks[UCHAR (field[i - 1])]; i--)
 940     continue;
 941   return len - i;
 942 }
 943
 944 /* Fill BUF reading from FP, moving buf->left bytes from the end
 945    of buf->buf to the beginning first.  If EOF is reached and the
 946    file wasn't terminated by a newline, supply one.  Set up BUF's line
 947    table too.  FILE is the name of the file corresponding to FP.
 948    Return true if some input was read.  */
 949
 950 static bool
 951 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
 952 {
 953   struct keyfield const *key = keylist;
 954   char eol = eolchar;
 955   size_t line_bytes = buf->line_bytes;
 956   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
 957
 958   if (buf->eof)
 959     return false;
 960
 961   if (buf->used != buf->left)
 962     {
 963       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 964       buf->used = buf->left;
 965       buf->nlines = 0;
 966     }
 967
 968   for (;;)
 969     {
 970       char *ptr = buf->buf + buf->used;
 971       struct line *linelim = buffer_linelim (buf);
 972       struct line *line = linelim - buf->nlines;
 973       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
 974       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
 975
 976       while (line_bytes + 1 < avail)
 977         {
 978           /* Read as many bytes as possible, but do not read so many
 979              bytes that there might not be enough room for the
 980              corresponding line array.  The worst case is when the
 981              rest of the input file consists entirely of newlines,
 982              except that the last byte is not a newline.  */
 983           size_t readsize = (avail - 1) / (line_bytes + 1);
 984           size_t bytes_read = fread (ptr, 1, readsize, fp);
 985           char *ptrlim = ptr + bytes_read;
 986           char *p;
 987           avail -= bytes_read;
 988
 989           if (bytes_read != readsize)
 990             {
 991               if (ferror (fp))
 992                 die (_("read failed"), file);
 993               if (feof (fp))
 994                 {
 995                   buf->eof = true;
 996                   if (buf->buf == ptrlim)
 997                     return false;
 998                   if (ptrlim[-1] != eol)
 999                     *ptrlim++ = eol;
1000                 }
1001             }
1002
1003           /* Find and record each line in the just-read input.  */
1004           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1005             {
1006               ptr = p + 1;
1007               line--;
1008               line->text = line_start;
1009               line->length = ptr - line_start;
1010               mergesize = MAX (mergesize, line->length);
1011               avail -= line_bytes;
1012
1013               if (key)
1014                 {
1015                   /* Precompute the position of the first key for
1016                      efficiency. */
1017                   line->keylim = (key->eword == SIZE_MAX
1018                                   ? p
1019                                   : limfield (line, key));
1020
1021                   if (key->sword != SIZE_MAX)
1022                     line->keybeg = begfield (line, key);
1023                   else
1024                     {
1025                       if (key->skipsblanks)
1026                         while (blanks[UCHAR (*line_start)])
1027                           line_start++;
1028                       line->keybeg = line_start;
1029                     }
1030                   if (key->skipeblanks)
1031                     {
1032                       size_t keylen = line->keylim - line->keybeg;
1033                       line->keylim -= trailing_blanks (line->keybeg, keylen);
1034                     }
1035                 }
1036
1037               line_start = ptr;
1038             }
1039
1040           ptr = ptrlim;
1041           if (buf->eof)
1042             break;
1043         }
1044
1045       buf->used = ptr - buf->buf;
1046       buf->nlines = buffer_linelim (buf) - line;
1047       if (buf->nlines != 0)
1048         {
1049           buf->left = ptr - line_start;
1050           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1051           return true;
1052         }
1053
1054       /* The current input line is too long to fit in the buffer.
1055          Double the buffer size and try again.  */
1056       if (2 * buf->alloc < buf->alloc)
1057         xalloc_die ();
1058       buf->alloc *= 2;
1059       buf->buf = xrealloc (buf->buf, buf->alloc);
1060     }
1061 }
1062
1063 /* Compare strings A and B containing decimal fractions < 1.  Each string
1064    should begin with a decimal point followed immediately by the digits
1065    of the fraction.  Strings not of this form are considered to be zero. */
1066
1067 /* The goal here, is to take two numbers a and b... compare these
1068    in parallel.  Instead of converting each, and then comparing the
1069    outcome.  Most likely stopping the comparison before the conversion
1070    is complete.  The algorithm used, in the old sort:
1071
1072    Algorithm: fraccompare
1073    Action   : compare two decimal fractions
1074    accepts  : char *a, char *b
1075    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1076    implement:
1077
1078    if *a == decimal_point AND *b == decimal_point
1079      find first character different in a and b.
1080      if both are digits, return the difference *a - *b.
1081      if *a is a digit
1082        skip past zeros
1083        if digit return 1, else 0
1084      if *b is a digit
1085        skip past zeros
1086        if digit return -1, else 0
1087    if *a is a decimal_point
1088      skip past decimal_point and zeros
1089      if digit return 1, else 0
1090    if *b is a decimal_point
1091      skip past decimal_point and zeros
1092      if digit return -1, else 0
1093    return 0 */
1094
1095 static int
1096 fraccompare (register const char *a, register const char *b)
1097 {
1098   if (*a == decimal_point && *b == decimal_point)
1099     {
1100       while (*++a == *++b)
1101         if (! ISDIGIT (*a))
1102           return 0;
1103       if (ISDIGIT (*a) && ISDIGIT (*b))
1104         return *a - *b;
1105       if (ISDIGIT (*a))
1106         goto a_trailing_nonzero;
1107       if (ISDIGIT (*b))
1108         goto b_trailing_nonzero;
1109       return 0;
1110     }
1111   else if (*a++ == decimal_point)
1112     {
1113     a_trailing_nonzero:
1114       while (*a == NUMERIC_ZERO)
1115         a++;
1116       return ISDIGIT (*a);
1117     }
1118   else if (*b++ == decimal_point)
1119     {
1120     b_trailing_nonzero:
1121       while (*b == NUMERIC_ZERO)
1122         b++;
1123       return - ISDIGIT (*b);
1124     }
1125   return 0;
1126 }
1127
1128 /* Compare strings A and B as numbers without explicitly converting them to
1129    machine numbers.  Comparatively slow for short strings, but asymptotically
1130    hideously fast. */
1131
1132 static int
1133 numcompare (register const char *a, register const char *b)
1134 {
1135   register int tmpa, tmpb, tmp;
1136   register size_t log_a, log_b;
1137
1138   tmpa = *a;
1139   tmpb = *b;
1140
1141   while (blanks[UCHAR (tmpa)])
1142     tmpa = *++a;
1143   while (blanks[UCHAR (tmpb)])
1144     tmpb = *++b;
1145
1146   if (tmpa == NEGATION_SIGN)
1147     {
1148       do
1149         tmpa = *++a;
1150       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1151       if (tmpb != NEGATION_SIGN)
1152         {
1153           if (tmpa == decimal_point)
1154             do
1155               tmpa = *++a;
1156             while (tmpa == NUMERIC_ZERO);
1157           if (ISDIGIT (tmpa))
1158             return -1;
1159           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1160             tmpb = *++b;
1161           if (tmpb == decimal_point)
1162             do
1163               tmpb = *++b;
1164             while (tmpb == NUMERIC_ZERO);
1165           if (ISDIGIT (tmpb))
1166             return -1;
1167           return 0;
1168         }
1169       do
1170         tmpb = *++b;
1171       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1172
1173       while (tmpa == tmpb && ISDIGIT (tmpa))
1174         {
1175           do
1176             tmpa = *++a;
1177           while (IS_THOUSANDS_SEP (tmpa));
1178           do
1179             tmpb = *++b;
1180           while (IS_THOUSANDS_SEP (tmpb));
1181         }
1182
1183       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1184           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1185         return -fraccompare (a, b);
1186
1187       tmp = tmpb - tmpa;
1188
1189       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1190         do
1191           tmpa = *++a;
1192         while (IS_THOUSANDS_SEP (tmpa));
1193
1194       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1195         do
1196           tmpb = *++b;
1197         while (IS_THOUSANDS_SEP (tmpb));
1198
1199       if (log_a != log_b)
1200         return log_a < log_b ? 1 : -1;
1201
1202       if (!log_a)
1203         return 0;
1204
1205       return tmp;
1206     }
1207   else if (tmpb == NEGATION_SIGN)
1208     {
1209       do
1210         tmpb = *++b;
1211       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1212       if (tmpb == decimal_point)
1213         do
1214           tmpb = *++b;
1215         while (tmpb == NUMERIC_ZERO);
1216       if (ISDIGIT (tmpb))
1217         return 1;
1218       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1219         tmpa = *++a;
1220       if (tmpa == decimal_point)
1221         do
1222           tmpa = *++a;
1223         while (tmpa == NUMERIC_ZERO);
1224       if (ISDIGIT (tmpa))
1225         return 1;
1226       return 0;
1227     }
1228   else
1229     {
1230       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1231         tmpa = *++a;
1232       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1233         tmpb = *++b;
1234
1235       while (tmpa == tmpb && ISDIGIT (tmpa))
1236         {
1237           do
1238             tmpa = *++a;
1239           while (IS_THOUSANDS_SEP (tmpa));
1240           do
1241             tmpb = *++b;
1242           while (IS_THOUSANDS_SEP (tmpb));
1243         }
1244
1245       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1246           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1247         return fraccompare (a, b);
1248
1249       tmp = tmpa - tmpb;
1250
1251       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1252         do
1253           tmpa = *++a;
1254         while (IS_THOUSANDS_SEP (tmpa));
1255
1256       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1257         do
1258           tmpb = *++b;
1259         while (IS_THOUSANDS_SEP (tmpb));
1260
1261       if (log_a != log_b)
1262         return log_a < log_b ? -1 : 1;
1263
1264       if (!log_a)
1265         return 0;
1266
1267       return tmp;
1268     }
1269 }
1270
1271 static int
1272 general_numcompare (const char *sa, const char *sb)
1273 {
1274   /* FIXME: add option to warn about failed conversions.  */
1275   /* FIXME: maybe add option to try expensive FP conversion
1276      only if A and B can't be compared more cheaply/accurately.  */
1277
1278   char *ea;
1279   char *eb;
1280   double a = strtod (sa, &ea);
1281   double b = strtod (sb, &eb);
1282
1283   /* Put conversion errors at the start of the collating sequence.  */
1284   if (sa == ea)
1285     return sb == eb ? 0 : -1;
1286   if (sb == eb)
1287     return 1;
1288
1289   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1290      conversion errors but before numbers; sort them by internal
1291      bit-pattern, for lack of a more portable alternative.  */
1292   return (a < b ? -1
1293           : a > b ? 1
1294           : a == b ? 0
1295           : b == b ? -1
1296           : a == a ? 1
1297           : memcmp ((char *) &a, (char *) &b, sizeof a));
1298 }
1299
1300 /* Return an integer in 1..12 of the month name S with length LEN.
1301    Return 0 if the name in S is not recognized.  */
1302
1303 static int
1304 getmonth (const char *s, size_t len)
1305 {
1306   char *month;
1307   register size_t i;
1308   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1309
1310   while (len > 0 && blanks[UCHAR (*s)])
1311     {
1312       ++s;
1313       --len;
1314     }
1315
1316   if (len == 0)
1317     return 0;
1318
1319   month = alloca (len + 1);
1320   for (i = 0; i < len; ++i)
1321     month[i] = fold_toupper[UCHAR (s[i])];
1322   len -= trailing_blanks (month, len);
1323   month[len] = '\0';
1324
1325   do
1326     {
1327       int ix = (lo + hi) / 2;
1328
1329       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1330         hi = ix;
1331       else
1332         lo = ix;
1333     }
1334   while (hi - lo > 1);
1335
1336   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1337             ? monthtab[lo].val : 0);
1338
1339   return result;
1340 }
1341
1342 /* Compare two lines A and B trying every key in sequence until there
1343    are no more keys or a difference is found. */
1344
1345 static int
1346 keycompare (const struct line *a, const struct line *b)
1347 {
1348   struct keyfield const *key = keylist;
1349
1350   /* For the first iteration only, the key positions have been
1351      precomputed for us. */
1352   register char *texta = a->keybeg;
1353   register char *textb = b->keybeg;
1354   register char *lima = a->keylim;
1355   register char *limb = b->keylim;
1356
1357   int diff;
1358
1359   for (;;)
1360     {
1361       register char const *translate = key->translate;
1362       register bool const *ignore = key->ignore;
1363
1364       /* Find the lengths. */
1365       size_t lena = lima <= texta ? 0 : lima - texta;
1366       size_t lenb = limb <= textb ? 0 : limb - textb;
1367
1368       if (key->skipeblanks)
1369         {
1370           lena -= trailing_blanks (texta, lena);
1371           lenb -= trailing_blanks (textb, lenb);
1372         }
1373
1374       /* Actually compare the fields. */
1375       if (key->numeric | key->general_numeric)
1376         {
1377           char savea = *lima, saveb = *limb;
1378
1379           *lima = *limb = '\0';
1380           diff = ((key->numeric ? numcompare : general_numcompare)
1381                   (texta, textb));
1382           *lima = savea, *limb = saveb;
1383         }
1384       else if (key->month)
1385         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1386       /* Sorting like this may become slow, so in a simple locale the user
1387          can select a faster sort that is similar to ascii sort  */
1388       else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1389         {
1390           if (ignore || translate)
1391             {
1392               char *copy_a = alloca (lena + 1 + lenb + 1);
1393               char *copy_b = copy_a + lena + 1;
1394               size_t new_len_a, new_len_b, i;
1395
1396               /* Ignore and/or translate chars before comparing.  */
1397               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1398                 {
1399                   if (i < lena)
1400                     {
1401                       copy_a[new_len_a] = (translate
1402                                            ? translate[UCHAR (texta[i])]
1403                                            : texta[i]);
1404                       if (!ignore || !ignore[UCHAR (texta[i])])
1405                         ++new_len_a;
1406                     }
1407                   if (i < lenb)
1408                     {
1409                       copy_b[new_len_b] = (translate
1410                                            ? translate[UCHAR (textb[i])]
1411                                            : textb [i]);
1412                       if (!ignore || !ignore[UCHAR (textb[i])])
1413                         ++new_len_b;
1414                     }
1415                 }
1416
1417               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1418             }
1419           else if (lena == 0)
1420             diff = - NONZERO (lenb);
1421           else if (lenb == 0)
1422             goto greater;
1423           else
1424             diff = xmemcoll (texta, lena, textb, lenb);
1425         }
1426       else if (ignore)
1427         {
1428 #define CMP_WITH_IGNORE(A, B)                                           \
1429   do                                                                    \
1430     {                                                                   \
1431           for (;;)                                                      \
1432             {                                                           \
1433               while (texta < lima && ignore[UCHAR (*texta)])            \
1434                 ++texta;                                                \
1435               while (textb < limb && ignore[UCHAR (*textb)])            \
1436                 ++textb;                                                \
1437               if (! (texta < lima && textb < limb))                     \
1438                 break;                                                  \
1439               diff = UCHAR (A) - UCHAR (B);                             \
1440               if (diff)                                                 \
1441                 goto not_equal;                                         \
1442               ++texta;                                                  \
1443               ++textb;                                                  \
1444             }                                                           \
1445                                                                         \
1446           diff = (texta < lima) - (textb < limb);                       \
1447     }                                                                   \
1448   while (0)
1449
1450           if (translate)
1451             CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1452                              translate[UCHAR (*textb)]);
1453           else
1454             CMP_WITH_IGNORE (*texta, *textb);
1455         }
1456       else if (lena == 0)
1457         diff = - NONZERO (lenb);
1458       else if (lenb == 0)
1459         goto greater;
1460       else
1461         {
1462           if (translate)
1463             {
1464               while (texta < lima && textb < limb)
1465                 {
1466                   diff = (UCHAR (translate[UCHAR (*texta++)])
1467                           - UCHAR (translate[UCHAR (*textb++)]));
1468                   if (diff)
1469                     goto not_equal;
1470                 }
1471             }
1472           else
1473             {
1474               diff = memcmp (texta, textb, MIN (lena, lenb));
1475               if (diff)
1476                 goto not_equal;
1477             }
1478           diff = lena < lenb ? -1 : lena != lenb;
1479         }
1480
1481       if (diff)
1482         goto not_equal;
1483
1484       key = key->next;
1485       if (! key)
1486         break;
1487
1488       /* Find the beginning and limit of the next field.  */
1489       if (key->eword != SIZE_MAX)
1490         lima = limfield (a, key), limb = limfield (b, key);
1491       else
1492         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1493
1494       if (key->sword != SIZE_MAX)
1495         texta = begfield (a, key), textb = begfield (b, key);
1496       else
1497         {
1498           texta = a->text, textb = b->text;
1499           if (key->skipsblanks)
1500             {
1501               while (texta < lima && blanks[UCHAR (*texta)])
1502                 ++texta;
1503               while (textb < limb && blanks[UCHAR (*textb)])
1504                 ++textb;
1505             }
1506         }
1507     }
1508
1509   return 0;
1510
1511  greater:
1512   diff = 1;
1513  not_equal:
1514   return key->reverse ? -diff : diff;
1515 }
1516
1517 /* Compare two lines A and B, returning negative, zero, or positive
1518    depending on whether A compares less than, equal to, or greater than B. */
1519
1520 static int
1521 compare (register const struct line *a, register const struct line *b)
1522 {
1523   int diff;
1524   size_t alen, blen;
1525
1526   /* First try to compare on the specified keys (if any).
1527      The only two cases with no key at all are unadorned sort,
1528      and unadorned sort -r. */
1529   if (keylist)
1530     {
1531       diff = keycompare (a, b);
1532       alloca (0);
1533       if (diff | unique | stable)
1534         return diff;
1535     }
1536
1537   /* If the keys all compare equal (or no keys were specified)
1538      fall through to the default comparison.  */
1539   alen = a->length - 1, blen = b->length - 1;
1540
1541   if (alen == 0)
1542     diff = - NONZERO (blen);
1543   else if (blen == 0)
1544     diff = 1;
1545   else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1546     diff = xmemcoll (a->text, alen, b->text, blen);
1547   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1548     diff = alen < blen ? -1 : alen != blen;
1549
1550   return reverse ? -diff : diff;
1551 }
1552
1553 /* Check that the lines read from FILE_NAME come in order.  Print a
1554    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1555    false if they are not in order.  Otherwise, print no diagnostic
1556    and return true.  */
1557
1558 static bool
1559 check (char const *file_name)
1560 {
1561   FILE *fp = xfopen (file_name, "r");
1562   struct buffer buf;            /* Input buffer. */
1563   struct line temp;             /* Copy of previous line. */
1564   size_t alloc = 0;
1565   uintmax_t line_number = 0;
1566   struct keyfield const *key = keylist;
1567   bool nonunique = ! unique;
1568   bool ordered = true;
1569
1570   initbuf (&buf, sizeof (struct line),
1571            MAX (merge_buffer_size, sort_size));
1572   temp.text = NULL;
1573
1574   while (fillbuf (&buf, fp, file_name))
1575     {
1576       struct line const *line = buffer_linelim (&buf);
1577       struct line const *linebase = line - buf.nlines;
1578
1579       /* Make sure the line saved from the old buffer contents is
1580          less than or equal to the first line of the new buffer. */
1581       if (alloc && nonunique <= compare (&temp, line - 1))
1582         {
1583         found_disorder:
1584           {
1585             struct line const *disorder_line = line - 1;
1586             uintmax_t disorder_line_number =
1587               buffer_linelim (&buf) - disorder_line + line_number;
1588             char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1589             fprintf (stderr, _("%s: %s:%s: disorder: "),
1590                      program_name, file_name,
1591                      umaxtostr (disorder_line_number, hr_buf));
1592             write_bytes (disorder_line->text, disorder_line->length, stderr,
1593                          _("standard error"));
1594             ordered = false;
1595             break;
1596           }
1597         }
1598
1599       /* Compare each line in the buffer with its successor.  */
1600       while (linebase < --line)
1601         if (nonunique <= compare (line, line - 1))
1602           goto found_disorder;
1603
1604       line_number += buf.nlines;
1605
1606       /* Save the last line of the buffer.  */
1607       if (alloc < line->length)
1608         {
1609           do
1610             {
1611               alloc *= 2;
1612               if (! alloc)
1613                 {
1614                   alloc = line->length;
1615                   break;
1616                 }
1617             }
1618           while (alloc < line->length);
1619
1620           temp.text = xrealloc (temp.text, alloc);
1621         }
1622       memcpy (temp.text, line->text, line->length);
1623       temp.length = line->length;
1624       if (key)
1625         {
1626           temp.keybeg = temp.text + (line->keybeg - line->text);
1627           temp.keylim = temp.text + (line->keylim - line->text);
1628         }
1629     }
1630
1631   xfclose (fp, file_name);
1632   free (buf.buf);
1633   if (temp.text)
1634     free (temp.text);
1635   return ordered;
1636 }
1637
1638 /* Merge lines from FILES onto OFP.  NFILES cannot be greater than
1639    NMERGE.  Close input and output files before returning.
1640    OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1641    output file has not been opened yet.  */
1642
1643 static void
1644 mergefps (char **files, register int nfiles,
1645           FILE *ofp, const char *output_file)
1646 {
1647   FILE *fps[NMERGE];            /* Input streams for each file.  */
1648   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1649   struct line saved;            /* Saved line storage for unique check. */
1650   struct line const *savedline = NULL;
1651                                 /* &saved if there is a saved line. */
1652   size_t savealloc = 0;         /* Size allocated for the saved line. */
1653   struct line const *cur[NMERGE]; /* Current line in each line table. */
1654   struct line const *base[NMERGE]; /* Base of each line table.  */
1655   int ord[NMERGE];              /* Table representing a permutation of fps,
1656                                    such that cur[ord[0]] is the smallest line
1657                                    and will be next output. */
1658   register int i, j, t;
1659   struct keyfield const *key = keylist;
1660   saved.text = NULL;
1661
1662   /* Read initial lines from each input file. */
1663   for (i = 0; i < nfiles; )
1664     {
1665       fps[i] = xfopen (files[i], "r");
1666       initbuf (&buffer[i], sizeof (struct line),
1667                MAX (merge_buffer_size, sort_size / nfiles));
1668       if (fillbuf (&buffer[i], fps[i], files[i]))
1669         {
1670           struct line const *linelim = buffer_linelim (&buffer[i]);
1671           cur[i] = linelim - 1;
1672           base[i] = linelim - buffer[i].nlines;
1673           i++;
1674         }
1675       else
1676         {
1677           /* fps[i] is empty; eliminate it from future consideration.  */
1678           xfclose (fps[i], files[i]);
1679           zaptemp (files[i]);
1680           free (buffer[i].buf);
1681           --nfiles;
1682           for (j = i; j < nfiles; ++j)
1683             files[j] = files[j + 1];
1684         }
1685     }
1686
1687   if (! ofp)
1688     ofp = xfopen (output_file, "w");
1689
1690   /* Set up the ord table according to comparisons among input lines.
1691      Since this only reorders two items if one is strictly greater than
1692      the other, it is stable. */
1693   for (i = 0; i < nfiles; ++i)
1694     ord[i] = i;
1695   for (i = 1; i < nfiles; ++i)
1696     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1697       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1698
1699   /* Repeatedly output the smallest line until no input remains. */
1700   while (nfiles)
1701     {
1702       struct line const *smallest = cur[ord[0]];
1703
1704       /* If uniquified output is turned on, output only the first of
1705          an identical series of lines. */
1706       if (unique)
1707         {
1708           if (savedline && compare (savedline, smallest))
1709             {
1710               savedline = 0;
1711               write_bytes (saved.text, saved.length, ofp, output_file);
1712             }
1713           if (!savedline)
1714             {
1715               savedline = &saved;
1716               if (savealloc < smallest->length)
1717                 {
1718                   do
1719                     if (! savealloc)
1720                       {
1721                         savealloc = smallest->length;
1722                         break;
1723                       }
1724                   while ((savealloc *= 2) < smallest->length);
1725
1726                   saved.text = xrealloc (saved.text, savealloc);
1727                 }
1728               saved.length = smallest->length;
1729               memcpy (saved.text, smallest->text, saved.length);
1730               if (key)
1731                 {
1732                   saved.keybeg =
1733                     saved.text + (smallest->keybeg - smallest->text);
1734                   saved.keylim =
1735                     saved.text + (smallest->keylim - smallest->text);
1736                 }
1737             }
1738         }
1739       else
1740         write_bytes (smallest->text, smallest->length, ofp, output_file);
1741
1742       /* Check if we need to read more lines into core. */
1743       if (base[ord[0]] < smallest)
1744         cur[ord[0]] = smallest - 1;
1745       else
1746         {
1747           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1748             {
1749               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1750               cur[ord[0]] = linelim - 1;
1751               base[ord[0]] = linelim - buffer[ord[0]].nlines;
1752             }
1753           else
1754             {
1755               /* We reached EOF on fps[ord[0]]. */
1756               for (i = 1; i < nfiles; ++i)
1757                 if (ord[i] > ord[0])
1758                   --ord[i];
1759               --nfiles;
1760               xfclose (fps[ord[0]], files[ord[0]]);
1761               zaptemp (files[ord[0]]);
1762               free (buffer[ord[0]].buf);
1763               for (i = ord[0]; i < nfiles; ++i)
1764                 {
1765                   fps[i] = fps[i + 1];
1766                   files[i] = files[i + 1];
1767                   buffer[i] = buffer[i + 1];
1768                   cur[i] = cur[i + 1];
1769                   base[i] = base[i + 1];
1770                 }
1771               for (i = 0; i < nfiles; ++i)
1772                 ord[i] = ord[i + 1];
1773               continue;
1774             }
1775         }
1776
1777       /* The new line just read in may be larger than other lines
1778          already in core; push it back in the queue until we encounter
1779          a line larger than it. */
1780       for (i = 1; i < nfiles; ++i)
1781         {
1782           t = compare (cur[ord[0]], cur[ord[i]]);
1783           if (!t)
1784             t = ord[0] - ord[i];
1785           if (t < 0)
1786             break;
1787         }
1788       t = ord[0];
1789       for (j = 1; j < i; ++j)
1790         ord[j - 1] = ord[j];
1791       ord[i - 1] = t;
1792     }
1793
1794   if (unique && savedline)
1795     {
1796       write_bytes (saved.text, saved.length, ofp, output_file);
1797       free (saved.text);
1798     }
1799
1800   xfclose (ofp, output_file);
1801 }
1802
1803 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1804    and HI (with NHI members).  T, LO, and HI point just past their
1805    respective arrays, and the arrays are in reverse order.  NLO and
1806    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
1807
1808 static inline void
1809 mergelines (struct line *t,
1810             struct line const *lo, size_t nlo,
1811             struct line const *hi, size_t nhi)
1812 {
1813   for (;;)
1814     if (compare (lo - 1, hi - 1) <= 0)
1815       {
1816         *--t = *--lo;
1817         if (! --nlo)
1818           {
1819             /* HI - NHI equalled T - (NLO + NHI) when this function
1820                began.  Therefore HI must equal T now, and there is no
1821                need to copy from HI to T.  */
1822             return;
1823           }
1824       }
1825     else
1826       {
1827         *--t = *--hi;
1828         if (! --nhi)
1829           {
1830             do
1831               *--t = *--lo;
1832             while (--nlo);
1833
1834             return;
1835           }
1836       }
1837 }
1838
1839 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1840    NLINES must be at least 2.
1841    The input and output arrays are in reverse order, and LINES and
1842    TEMP point just past the end of their respective arrays.
1843
1844    Use a recursive divide-and-conquer algorithm, in the style
1845    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
1846    the optimization suggested by exercise 5.2.4-10; this requires room
1847    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
1848    writes that this memory optimization was originally published by
1849    D. A. Bell, Comp J. 1 (1958), 75.  */
1850
1851 static void
1852 sortlines (struct line *lines, size_t nlines, struct line *temp)
1853 {
1854   if (nlines == 2)
1855     {
1856       if (0 < compare (&lines[-1], &lines[-2]))
1857         {
1858           struct line tmp = lines[-1];
1859           lines[-1] = lines[-2];
1860           lines[-2] = tmp;
1861         }
1862     }
1863   else
1864     {
1865       size_t nlo = nlines / 2;
1866       size_t nhi = nlines - nlo;
1867       struct line *lo = lines;
1868       struct line *hi = lines - nlo;
1869       struct line *sorted_lo = temp;
1870
1871       sortlines (hi, nhi, temp);
1872       if (1 < nlo)
1873         sortlines_temp (lo, nlo, sorted_lo);
1874       else
1875         sorted_lo[-1] = lo[-1];
1876
1877       mergelines (lines, sorted_lo, nlo, hi, nhi);
1878     }
1879 }
1880
1881 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1882    rather than sorting in place.  */
1883
1884 static void
1885 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1886 {
1887   if (nlines == 2)
1888     {
1889       bool swap = (0 < compare (&lines[-1], &lines[-2]));
1890       temp[-1] = lines[-1 - swap];
1891       temp[-2] = lines[-2 + swap];
1892     }
1893   else
1894     {
1895       size_t nlo = nlines / 2;
1896       size_t nhi = nlines - nlo;
1897       struct line *lo = lines;
1898       struct line *hi = lines - nlo;
1899       struct line *sorted_hi = temp - nlo;
1900
1901       sortlines_temp (hi, nhi, sorted_hi);
1902       if (1 < nlo)
1903         sortlines (lo, nlo, temp);
1904
1905       mergelines (temp, lo, nlo, sorted_hi, nhi);
1906     }
1907 }
1908
1909 /* Return the index of the first of NFILES FILES that is the same file
1910    as OUTFILE.  If none can be the same, return NFILES.  Consider an
1911    input pipe to be the same as OUTFILE, since the pipe might be the
1912    output of a command like "cat OUTFILE".  */
1913
1914 static int
1915 first_same_file (char * const *files, int nfiles, char const *outfile)
1916 {
1917   int i;
1918   bool got_outstat = false;
1919   struct stat instat, outstat;
1920
1921   for (i = 0; i < nfiles; i++)
1922     {
1923       bool standard_input = STREQ (files[i], "-");
1924
1925       if (STREQ (outfile, files[i]) && ! standard_input)
1926         return i;
1927
1928       if (! got_outstat)
1929         {
1930           got_outstat = true;
1931           if ((STREQ (outfile, "-")
1932                ? fstat (STDOUT_FILENO, &outstat)
1933                : stat (outfile, &outstat))
1934               != 0)
1935             return nfiles;
1936         }
1937
1938       if (((standard_input
1939             ? fstat (STDIN_FILENO, &instat)
1940             : stat (files[i], &instat))
1941            == 0)
1942           && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1943         return i;
1944     }
1945
1946   return nfiles;
1947 }
1948
1949 /* Merge NFILES FILES onto OUTPUT_FILE.  However, merge at most
1950    MAX_MERGE input files directly onto OUTPUT_FILE.  MAX_MERGE cannot
1951    exceed NMERGE.  */
1952
1953 static void
1954 merge (char **files, int nfiles, int max_merge, char const *output_file)
1955 {
1956   while (max_merge < nfiles)
1957     {
1958       FILE *tfp;
1959       int i, t = 0;
1960       char *temp;
1961       for (i = 0; i < nfiles / NMERGE; ++i)
1962         {
1963           temp = create_temp_file (&tfp);
1964           mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1965           files[t++] = temp;
1966         }
1967       temp = create_temp_file (&tfp);
1968       mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1969       files[t++] = temp;
1970       nfiles = t;
1971       if (nfiles == 1)
1972         break;
1973     }
1974
1975   mergefps (files, nfiles, NULL, output_file);
1976 }
1977
1978 /* Sort NFILES FILES onto OUTPUT_FILE. */
1979
1980 static void
1981 sort (char * const *files, int nfiles, char const *output_file)
1982 {
1983   struct buffer buf;
1984   int n_temp_files = 0;
1985   bool output_file_created = false;
1986
1987   buf.alloc = 0;
1988
1989   while (nfiles)
1990     {
1991       char const *temp_output;
1992       char const *file = *files;
1993       FILE *fp = xfopen (file, "r");
1994       FILE *tfp;
1995       size_t bytes_per_line = 2 * sizeof (struct line) - sizeof (struct line) / 2;
1996
1997       if (! buf.alloc)
1998         initbuf (&buf, bytes_per_line,
1999                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2000       buf.eof = false;
2001       files++;
2002       nfiles--;
2003
2004       while (fillbuf (&buf, fp, file))
2005         {
2006           struct line *line;
2007           struct line *linebase;
2008
2009           if (buf.eof && nfiles
2010               && (bytes_per_line + 1
2011                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2012             {
2013               /* End of file, but there is more input and buffer room.
2014                  Concatenate the next input file; this is faster in
2015                  the usual case.  */
2016               buf.left = buf.used;
2017               break;
2018             }
2019
2020           line = buffer_linelim (&buf);
2021           linebase = line - buf.nlines;
2022           if (1 < buf.nlines)
2023             sortlines (line, buf.nlines, linebase);
2024           if (buf.eof && !nfiles && !n_temp_files && !buf.left)
2025             {
2026               xfclose (fp, file);
2027               tfp = xfopen (output_file, "w");
2028               temp_output = output_file;
2029               output_file_created = true;
2030             }
2031           else
2032             {
2033               ++n_temp_files;
2034               temp_output = create_temp_file (&tfp);
2035             }
2036
2037           do
2038             {
2039               line--;
2040               write_bytes (line->text, line->length, tfp, temp_output);
2041               if (unique)
2042                 while (linebase < line && compare (line, line - 1) == 0)
2043                   line--;
2044             }
2045           while (linebase < line);
2046
2047           xfclose (tfp, temp_output);
2048
2049           if (output_file_created)
2050             goto finish;
2051         }
2052       xfclose (fp, file);
2053     }
2054
2055  finish:
2056   free (buf.buf);
2057
2058   if (! output_file_created)
2059     {
2060       int i = n_temp_files;
2061       struct tempnode *node;
2062       char **tempfiles = xmalloc (n_temp_files * sizeof (char *));
2063       for (node = temphead; i > 0; node = node->next)
2064         tempfiles[--i] = node->name;
2065       merge (tempfiles, n_temp_files, NMERGE, output_file);
2066       free (tempfiles);
2067     }
2068 }
2069
2070 /* Insert key KEY at the end of the key list.  */
2071
2072 static void
2073 insertkey (struct keyfield *key)
2074 {
2075   struct keyfield **p;
2076
2077   for (p = &keylist; *p; p = &(*p)->next)
2078     continue;
2079   *p = key;
2080   key->next = NULL;
2081 }
2082
2083 /* Report a bad field specification SPEC, with extra info MSGID.  */
2084
2085 static void badfieldspec (char const *, char const *)
2086      ATTRIBUTE_NORETURN;
2087 static void
2088 badfieldspec (char const *spec, char const *msgid)
2089 {
2090   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2091          _(msgid), spec);
2092   abort ();
2093 }
2094
2095 /* Parse the leading integer in STRING and store the resulting value
2096    (which must fit into size_t) into *VAL.  Return the address of the
2097    suffix after the integer.  If MSGID is NULL, return NULL after
2098    failure; otherwise, report MSGID and exit on failure.  */
2099
2100 static char const *
2101 parse_field_count (char const *string, size_t *val, char const *msgid)
2102 {
2103   char *suffix;
2104   uintmax_t n;
2105
2106   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2107     {
2108     case LONGINT_OK:
2109     case LONGINT_INVALID_SUFFIX_CHAR:
2110       *val = n;
2111       if (*val == n)
2112         break;
2113       /* Fall through.  */
2114     case LONGINT_OVERFLOW:
2115     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2116       if (msgid)
2117         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2118                _(msgid), (int) (suffix - string), string);
2119       return NULL;
2120
2121     case LONGINT_INVALID:
2122       if (msgid)
2123         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2124                _(msgid), string);
2125       return NULL;
2126     }
2127
2128   return suffix;
2129 }
2130
2131 /* Handle interrupts and hangups. */
2132
2133 static void
2134 sighandler (int sig)
2135 {
2136 #ifndef SA_NOCLDSTOP
2137   signal (sig, SIG_IGN);
2138 #endif
2139
2140   cleanup ();
2141
2142 #ifdef SA_NOCLDSTOP
2143   {
2144     struct sigaction sigact;
2145
2146     sigact.sa_handler = SIG_DFL;
2147     sigemptyset (&sigact.sa_mask);
2148     sigact.sa_flags = 0;
2149     sigaction (sig, &sigact, NULL);
2150   }
2151 #else
2152   signal (sig, SIG_DFL);
2153 #endif
2154
2155   raise (sig);
2156 }
2157
2158 /* Set the ordering options for KEY specified in S.
2159    Return the address of the first character in S that
2160    is not a valid ordering option.
2161    BLANKTYPE is the kind of blanks that 'b' should skip. */
2162
2163 static char *
2164 set_ordering (register const char *s, struct keyfield *key,
2165               enum blanktype blanktype)
2166 {
2167   while (*s)
2168     {
2169       switch (*s)
2170         {
2171         case 'b':
2172           if (blanktype == bl_start || blanktype == bl_both)
2173             key->skipsblanks = true;
2174           if (blanktype == bl_end || blanktype == bl_both)
2175             key->skipeblanks = true;
2176           break;
2177         case 'd':
2178           key->ignore = nondictionary;
2179           break;
2180         case 'f':
2181           key->translate = fold_toupper;
2182           break;
2183         case 'g':
2184           key->general_numeric = true;
2185           break;
2186         case 'i':
2187           /* Option order should not matter, so don't let -i override
2188              -d.  -d implies -i, but -i does not imply -d.  */
2189           if (! key->ignore)
2190             key->ignore = nonprinting;
2191           break;
2192         case 'M':
2193           key->month = true;
2194           break;
2195         case 'n':
2196           key->numeric = true;
2197           break;
2198         case 'r':
2199           key->reverse = true;
2200           break;
2201         default:
2202           return (char *) s;
2203         }
2204       ++s;
2205     }
2206   return (char *) s;
2207 }
2208
2209 static struct keyfield *
2210 new_key (void)
2211 {
2212   struct keyfield *key = xcalloc (1, sizeof *key);
2213   key->eword = SIZE_MAX;
2214   return key;
2215 }
2216
2217 int
2218 main (int argc, char **argv)
2219 {
2220   struct keyfield *key;
2221   struct keyfield gkey;
2222   char const *s;
2223   int c = 0;
2224   bool checkonly = false;
2225   bool mergeonly = false;
2226   int nfiles = 0;
2227   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2228   bool obsolete_usage = (posix2_version () < 200112);
2229   char const *short_options = (obsolete_usage
2230                                ? COMMON_SHORT_OPTIONS "y::"
2231                                : COMMON_SHORT_OPTIONS "y:");
2232   char *minus = "-", **files;
2233   char const *outfile = minus;
2234   static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2235   unsigned int nsigs = sizeof sigs / sizeof *sigs;
2236 #ifdef SA_NOCLDSTOP
2237   struct sigaction oldact, newact;
2238 #endif
2239
2240   initialize_main (&argc, &argv);
2241   program_name = argv[0];
2242   setlocale (LC_ALL, "");
2243   bindtextdomain (PACKAGE, LOCALEDIR);
2244   textdomain (PACKAGE);
2245
2246   atexit (cleanup);
2247
2248   exit_failure = SORT_FAILURE;
2249   atexit (close_stdout);
2250
2251   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2252 #if HAVE_NL_LANGINFO
2253   hard_LC_TIME = hard_locale (LC_TIME);
2254 #endif
2255
2256 #if HAVE_SETLOCALE
2257   /* Let's get locale's representation of the decimal point */
2258   {
2259     struct lconv const *lconvp = localeconv ();
2260
2261     /* If the locale doesn't define a decimal point, or if the decimal
2262        point is multibyte, use the C decimal point.  We don't support
2263        multibyte decimal points yet.  */
2264     decimal_point = *lconvp->decimal_point;
2265     if (! decimal_point || lconvp->decimal_point[1])
2266       decimal_point = C_DECIMAL_POINT;
2267
2268     /* We don't support multibyte thousands separators yet.  */
2269     th_sep = *lconvp->thousands_sep;
2270     if (! th_sep || lconvp->thousands_sep[1])
2271       th_sep = CHAR_MAX + 1;
2272   }
2273 #endif
2274
2275   have_read_stdin = false;
2276   inittables ();
2277
2278   /* Change the way library functions fail.  */
2279   exit_failure = SORT_FAILURE;
2280
2281 #ifdef SA_NOCLDSTOP
2282   {
2283     unsigned int i;
2284     sigemptyset (&caught_signals);
2285     for (i = 0; i < nsigs; i++)
2286       sigaddset (&caught_signals, sigs[i]);
2287     newact.sa_handler = sighandler;
2288     newact.sa_mask = caught_signals;
2289     newact.sa_flags = 0;
2290   }
2291 #endif
2292
2293   {
2294     unsigned int i;
2295     for (i = 0; i < nsigs; i++)
2296       {
2297         int sig = sigs[i];
2298 #ifdef SA_NOCLDSTOP
2299         sigaction (sig, NULL, &oldact);
2300         if (oldact.sa_handler != SIG_IGN)
2301           sigaction (sig, &newact, NULL);
2302 #else
2303         if (signal (sig, SIG_IGN) != SIG_IGN)
2304           signal (sig, sighandler);
2305 #endif
2306       }
2307   }
2308
2309   gkey.sword = gkey.eword = SIZE_MAX;
2310   gkey.ignore = NULL;
2311   gkey.translate = NULL;
2312   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2313   gkey.skipsblanks = gkey.skipeblanks = false;
2314
2315   files = xmalloc (sizeof (char *) * argc);
2316
2317   for (;;)
2318     {
2319       /* Parse an operand as a file after "--" was seen; or if
2320          pedantic and a file was seen, unless the POSIX version
2321          predates 1003.1-2001 and -c was not seen and the operand is
2322          "-o FILE" or "-oFILE".  */
2323
2324       if (c == -1
2325           || (posixly_correct && nfiles != 0
2326               && ! (obsolete_usage
2327                     && ! checkonly
2328                     && optind != argc
2329                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2330                     && (argv[optind][2] || optind + 1 != argc)))
2331           || ((c = getopt_long (argc, argv, short_options,
2332                                 long_options, NULL))
2333               == -1))
2334         {
2335           if (argc <= optind)
2336             break;
2337           files[nfiles++] = argv[optind++];
2338         }
2339       else switch (c)
2340         {
2341         case 1:
2342           key = NULL;
2343           if (obsolete_usage && optarg[0] == '+')
2344             {
2345               /* Treat +POS1 [-POS2] as a key if possible; but silently
2346                  treat an operand as a file if it is not a valid +POS1.  */
2347               key = new_key ();
2348               s = parse_field_count (optarg + 1, &key->sword, NULL);
2349               if (s && *s == '.')
2350                 s = parse_field_count (s + 1, &key->schar, NULL);
2351               if (! (key->sword | key->schar))
2352                 key->sword = SIZE_MAX;
2353               if (! s || *set_ordering (s, key, bl_start))
2354                 {
2355                   free (key);
2356                   key = NULL;
2357                 }
2358               else
2359                 {
2360                   if (optind != argc && argv[optind][0] == '-'
2361                       && ISDIGIT (argv[optind][1]))
2362                     {
2363                       char const *optarg1 = argv[optind++];
2364                       s = parse_field_count (optarg1 + 1, &key->eword,
2365                                              N_("invalid number after `-'"));
2366                       if (*s == '.')
2367                         s = parse_field_count (s + 1, &key->echar,
2368                                                N_("invalid number after `.'"));
2369                       if (*set_ordering (s, key, bl_end))
2370                         badfieldspec (optarg1,
2371                                       N_("stray character in field spec"));
2372                     }
2373                   insertkey (key);
2374                 }
2375             }
2376           if (! key)
2377             files[nfiles++] = optarg;
2378           break;
2379
2380         case 'b':
2381         case 'd':
2382         case 'f':
2383         case 'g':
2384         case 'i':
2385         case 'M':
2386         case 'n':
2387         case 'r':
2388           {
2389             char str[2];
2390             str[0] = c;
2391             str[1] = '\0';
2392             set_ordering (str, &gkey, bl_both);
2393           }
2394           break;
2395
2396         case 'c':
2397           checkonly = true;
2398           break;
2399
2400         case 'k':
2401           key = new_key ();
2402
2403           /* Get POS1. */
2404           s = parse_field_count (optarg, &key->sword,
2405                                  N_("invalid number at field start"));
2406           if (! key->sword--)
2407             {
2408               /* Provoke with `sort -k0' */
2409               badfieldspec (optarg, N_("field number is zero"));
2410             }
2411           if (*s == '.')
2412             {
2413               s = parse_field_count (s + 1, &key->schar,
2414                                      N_("invalid number after `.'"));
2415               if (! key->schar--)
2416                 {
2417                   /* Provoke with `sort -k1.0' */
2418                   badfieldspec (optarg, N_("character offset is zero"));
2419                 }
2420             }
2421           if (! (key->sword | key->schar))
2422             key->sword = SIZE_MAX;
2423           s = set_ordering (s, key, bl_start);
2424           if (*s != ',')
2425             {
2426               key->eword = SIZE_MAX;
2427               key->echar = 0;
2428             }
2429           else
2430             {
2431               /* Get POS2. */
2432               s = parse_field_count (s + 1, &key->eword,
2433                                      N_("invalid number after `,'"));
2434               if (! key->eword--)
2435                 {
2436                   /* Provoke with `sort -k1,0' */
2437                   badfieldspec (optarg, N_("field number is zero"));
2438                 }
2439               if (*s == '.')
2440                 s = parse_field_count (s + 1, &key->echar,
2441                                        N_("invalid number after `.'"));
2442               else
2443                 {
2444                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2445                   key->eword++;
2446                 }
2447               s = set_ordering (s, key, bl_end);
2448             }
2449           if (*s)
2450             badfieldspec (optarg, N_("stray character in field spec"));
2451           insertkey (key);
2452           break;
2453
2454         case 'm':
2455           mergeonly = true;
2456           break;
2457
2458         case 'o':
2459           if (outfile != minus && strcmp (outfile, optarg) != 0)
2460             error (SORT_FAILURE, 0, _("multiple output files specified"));
2461           outfile = optarg;
2462           break;
2463
2464         case 's':
2465           stable = true;
2466           break;
2467
2468         case 'S':
2469           specify_sort_size (optarg);
2470           break;
2471
2472         case 't':
2473           {
2474             int newtab = optarg[0];
2475             if (! newtab)
2476               error (SORT_FAILURE, 0, _("empty tab"));
2477             if (optarg[1])
2478               {
2479                 if (strcmp (optarg, "\\0") == 0)
2480                   newtab = '\0';
2481                 else
2482                   {
2483                     /* Provoke with `sort -txx'.  Complain about
2484                        "multi-character tab" instead of "multibyte tab", so
2485                        that the diagnostic's wording does not need to be
2486                        changed once multibyte characters are supported.  */
2487                     error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
2488                            optarg);
2489                   }
2490               }
2491             if (tab != TAB_DEFAULT && tab != newtab)
2492               error (SORT_FAILURE, 0, _("incompatible tabs"));
2493             tab = newtab;
2494           }
2495           break;
2496
2497         case 'T':
2498           add_temp_dir (optarg);
2499           break;
2500
2501         case 'u':
2502           unique = true;
2503           break;
2504
2505         case 'y':
2506           /* Accept and ignore e.g. -y0 for compatibility with Solaris
2507              2.x through Solaris 7.  -y is marked as obsolete starting
2508              with Solaris 8.  */
2509           break;
2510
2511         case 'z':
2512           eolchar = 0;
2513           break;
2514
2515         case_GETOPT_HELP_CHAR;
2516
2517         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2518
2519         default:
2520           usage (SORT_FAILURE);
2521         }
2522     }
2523
2524   /* Inheritance of global options to individual keys. */
2525   for (key = keylist; key; key = key->next)
2526     if (! (key->ignore || key->translate
2527            || (key->skipsblanks | key->reverse
2528                | key->skipeblanks | key->month | key->numeric
2529                | key->general_numeric)))
2530       {
2531         key->ignore = gkey.ignore;
2532         key->translate = gkey.translate;
2533         key->skipsblanks = gkey.skipsblanks;
2534         key->skipeblanks = gkey.skipeblanks;
2535         key->month = gkey.month;
2536         key->numeric = gkey.numeric;
2537         key->general_numeric = gkey.general_numeric;
2538         key->reverse = gkey.reverse;
2539       }
2540
2541   if (!keylist && (gkey.ignore || gkey.translate
2542                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2543                        | gkey.numeric | gkey.general_numeric)))
2544     insertkey (&gkey);
2545   reverse = gkey.reverse;
2546
2547   if (temp_dir_count == 0)
2548     {
2549       char const *tmp_dir = getenv ("TMPDIR");
2550       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2551     }
2552
2553   if (nfiles == 0)
2554     {
2555       nfiles = 1;
2556       files = &minus;
2557     }
2558
2559   if (checkonly)
2560     {
2561       if (nfiles > 1)
2562         error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2563                files[1]);
2564
2565       /* POSIX requires that sort return 1 IFF invoked with -c and the
2566          input is not properly sorted.  */
2567       exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2568     }
2569
2570   if (mergeonly)
2571     {
2572       int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2573       merge (files, nfiles, max_merge, outfile);
2574     }
2575   else
2576     sort (files, nfiles, outfile);
2577
2578   if (have_read_stdin && fclose (stdin) == EOF)
2579     die (_("close failed"), "-");
2580
2581   exit (EXIT_SUCCESS);
2582 }