src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-2001 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <signal.h>
  29 #include <stdio.h>
  30 #include <assert.h>
  31 #include "system.h"
  32 #include "long-options.h"
  33 #include "error.h"
  34 #include "hard-locale.h"
  35 #include "human.h"
  36 #include "memcoll.h"
  37 #include "physmem.h"
  38 #include "stdio-safer.h"
  39 #include "xalloc.h"
  40 #include "xstrtol.h"
  41
  42 #if HAVE_SYS_RESOURCE_H
  43 # include <sys/resource.h>
  44 #endif
  45 #ifndef RLIMIT_DATA
  46 struct rlimit { size_t rlim_cur; };
  47 # define getrlimit(Resource, Rlp) (-1)
  48 #endif
  49
  50 /* The official name of this program (e.g., no `g' prefix).  */
  51 #define PROGRAM_NAME "sort"
  52
  53 #define AUTHORS N_ ("Mike Haertel and Paul Eggert")
  54
  55 #if defined ENABLE_NLS && HAVE_LANGINFO_H
  56 # include <langinfo.h>
  57 #endif
  58
  59 #ifndef SA_NOCLDSTOP
  60 # define sigprocmask(How, Set, Oset) /* empty */
  61 # define sigset_t int
  62 #endif
  63
  64 #ifndef STDC_HEADERS
  65 double strtod ();
  66 #endif
  67
  68 /* Undefine, to avoid warning about redefinition on some systems.  */
  69 /* FIXME: Remove these: use MIN/MAX from sys2.h.  */
  70 #undef min
  71 #define min(a, b) ((a) < (b) ? (a) : (b))
  72 #undef max
  73 #define max(a, b) ((a) > (b) ? (a) : (b))
  74
  75 #define UCHAR_LIM (UCHAR_MAX + 1)
  76 #define UCHAR(c) ((unsigned char) (c))
  77
  78 #ifndef DEFAULT_TMPDIR
  79 # define DEFAULT_TMPDIR "/tmp"
  80 #endif
  81
  82 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  83    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  84    that sort exit with status 1 IFF invoked with -c and the input is
  85    not properly sorted.  Any other irregular exit must exit with a
  86    status code greater than 1.  */
  87 #define SORT_FAILURE 2
  88 #define SORT_OUT_OF_ORDER 1
  89
  90 #define C_DECIMAL_POINT '.'
  91 #define NEGATION_SIGN   '-'
  92 #define NUMERIC_ZERO    '0'
  93
  94 #ifdef ENABLE_NLS
  95
  96 static char decimal_point;
  97 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
  98
  99 /* Nonzero if the corresponding locales are hard.  */
 100 static int hard_LC_COLLATE;
 101 # if HAVE_NL_LANGINFO
 102 static int hard_LC_TIME;
 103 # endif
 104
 105 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 106
 107 #else
 108
 109 # define decimal_point C_DECIMAL_POINT
 110 # define IS_THOUSANDS_SEP(x) 0
 111
 112 #endif
 113
 114 #define NONZERO(x) (x != 0)
 115
 116 /* The kind of blanks for '-b' to skip in various options. */
 117 enum blanktype { bl_start, bl_end, bl_both };
 118
 119 /* The character marking end of line. Default to \n. */
 120 static int eolchar = '\n';
 121
 122 /* Lines are held in core as counted strings. */
 123 struct line
 124 {
 125   char *text;                   /* Text of the line. */
 126   size_t length;                /* Length including final newline. */
 127   char *keybeg;                 /* Start of first key. */
 128   char *keylim;                 /* Limit of first key. */
 129 };
 130
 131 /* Input buffers. */
 132 struct buffer
 133 {
 134   char *buf;                    /* Dynamically allocated buffer,
 135                                    partitioned into 3 regions:
 136                                    - input data;
 137                                    - unused area;
 138                                    - an array of lines, in reverse order.  */
 139   size_t used;                  /* Number of bytes used for input data.  */
 140   size_t nlines;                /* Number of lines in the line array.  */
 141   size_t alloc;                 /* Number of bytes allocated. */
 142   size_t left;                  /* Number of bytes left from previous reads. */
 143   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 144   int eof;                      /* An EOF has been read.  */
 145 };
 146
 147 struct keyfield
 148 {
 149   size_t sword;                 /* Zero-origin 'word' to start at. */
 150   size_t schar;                 /* Additional characters to skip. */
 151   int skipsblanks;              /* Skip leading white space at start. */
 152   size_t eword;                 /* Zero-origin first word after field. */
 153   size_t echar;                 /* Additional characters in field. */
 154   int skipeblanks;              /* Skip trailing white space at finish. */
 155   int *ignore;                  /* Boolean array of characters to ignore. */
 156   char *translate;              /* Translation applied to characters. */
 157   int numeric;                  /* Flag for numeric comparison.  Handle
 158                                    strings of digits with optional decimal
 159                                    point, but no exponential notation. */
 160   int general_numeric;          /* Flag for general, numeric comparison.
 161                                    Handle numbers in exponential notation. */
 162   int month;                    /* Flag for comparison by month name. */
 163   int reverse;                  /* Reverse the sense of comparison. */
 164   struct keyfield *next;        /* Next keyfield to try. */
 165 };
 166
 167 struct month
 168 {
 169   char *name;
 170   int val;
 171 };
 172
 173 /* The name this program was run with. */
 174 char *program_name;
 175
 176 /* FIXME: None of these tables work with multibyte character sets.
 177    Also, there are many other bugs when handling multibyte characters,
 178    or even unibyte encodings where line boundaries are not in the
 179    initial shift state.  One way to fix this is to rewrite `sort' to
 180    use wide characters internally, but doing this with good
 181    performance is a bit tricky.  */
 182
 183 /* Table of white space. */
 184 static int blanks[UCHAR_LIM];
 185
 186 /* Table of non-printing characters. */
 187 static int nonprinting[UCHAR_LIM];
 188
 189 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 190 static int nondictionary[UCHAR_LIM];
 191
 192 /* Translation table folding lower case to upper.  */
 193 static char fold_toupper[UCHAR_LIM];
 194
 195 #define MONTHS_PER_YEAR 12
 196
 197 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 198 # define MONTHTAB_CONST /* empty */
 199 #else
 200 # define MONTHTAB_CONST const
 201 #endif
 202
 203 /* Table mapping month names to integers.
 204    Alphabetic order allows binary search. */
 205 static MONTHTAB_CONST struct month monthtab[] =
 206 {
 207   {"APR", 4},
 208   {"AUG", 8},
 209   {"DEC", 12},
 210   {"FEB", 2},
 211   {"JAN", 1},
 212   {"JUL", 7},
 213   {"JUN", 6},
 214   {"MAR", 3},
 215   {"MAY", 5},
 216   {"NOV", 11},
 217   {"OCT", 10},
 218   {"SEP", 9}
 219 };
 220
 221 /* During the merge phase, the number of files to merge at once. */
 222 #define NMERGE 16
 223
 224 /* Minimum size for a merge or check buffer.  */
 225 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 226
 227 /* Minimum sort size; the code might not work with smaller sizes.  */
 228 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 229
 230 /* The number of bytes needed for a merge or check buffer, which can
 231    function relatively efficiently even if it holds only one line.  If
 232    a longer line is seen, this value is increased.  */
 233 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 234
 235 /* The approximate maximum number of bytes of main memory to use, as
 236    specified by the user.  Zero if the user has not specified a size.  */
 237 static size_t sort_size;
 238
 239 /* The guessed size for non-regular files.  */
 240 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 241
 242 /* Array of directory names in which any temporary files are to be created. */
 243 static char const **temp_dirs;
 244
 245 /* Number of temporary directory names used.  */
 246 static size_t temp_dir_count;
 247
 248 /* Number of allocated slots in temp_dirs.  */
 249 static size_t temp_dir_alloc;
 250
 251 /* Our process ID.  */
 252 static pid_t process_id;
 253
 254 /* Flag to reverse the order of all comparisons. */
 255 static int reverse;
 256
 257 /* Flag for stable sort.  This turns off the last ditch bytewise
 258    comparison of lines, and instead leaves lines in the same order
 259    they were read if all keys compare equal.  */
 260 static int stable;
 261
 262 /* Tab character separating fields.  If NUL, then fields are separated
 263    by the empty string between a non-whitespace character and a whitespace
 264    character. */
 265 static char tab;
 266
 267 /* Flag to remove consecutive duplicate lines from the output.
 268    Only the last of a sequence of equal lines will be output. */
 269 static int unique;
 270
 271 /* Nonzero if any of the input files are the standard input. */
 272 static int have_read_stdin;
 273
 274 /* List of key field comparisons to be tried.  */
 275 static struct keyfield *keylist;
 276
 277 void
 278 usage (int status)
 279 {
 280   if (status != 0)
 281     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 282              program_name);
 283   else
 284     {
 285       printf (_("\
 286 Usage: %s [OPTION]... [FILE]...\n\
 287 "),
 288               program_name);
 289       printf (_("\
 290 Write sorted concatenation of all FILE(s) to standard output.\n\
 291 \n\
 292 Ordering options:\n\
 293 \n\
 294   -b, --ignore-leading-blanks ignore leading blanks\n\
 295   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 296   -f, --ignore-case           fold lower case to upper case characters\n\
 297   -g, --general-numeric-sort  compare according to general numerical value\n\
 298   -i, --ignore-nonprinting    consider only printable characters\n\
 299   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 300   -n, --numeric-sort          compare according to string numerical value\n\
 301   -r, --reverse               reverse the result of comparisons\n\
 302 \n\
 303 ")
 304               );
 305       printf (_("\
 306 Other options:\n\
 307 \n\
 308   -c, --check               check whether input is sorted; do not sort\n\
 309   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 310   -m, --merge               merge already sorted files; do not sort\n\
 311   -o, --output=FILE         write result to FILE instead of standard output\n\
 312   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 313   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 314   -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\
 315   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s\n\
 316                               multiple options specify multiple directories\n\
 317   -u, --unique              with -c: check for strict ordering\n\
 318                               otherwise: output only the first of an equal run\n\
 319   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 320   +POS1 [-POS2]             start a key at POS1, end it before POS2 (origin 0)\n\
 321                               Warning: this option is obsolescent\n\
 322       --help       display this help and exit\n\
 323       --version    output version information and exit\n\
 324 \n\
 325 "),
 326               DEFAULT_TMPDIR);
 327       printf (_("\
 328 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 329 in the field, both counted from one with -k, from zero with the obsolescent\n\
 330 form.  OPTS is made up of one or more single-letter ordering options, which\n\
 331 override global ordering options for that key.  If no key is given, use the\n\
 332 entire line as the key.\n\
 333 \n\
 334 SIZE may be followed by the following multiplicative suffixes:\n\
 335 %% 1%% of memory, b 1, k 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 336 \n\
 337 With no FILE, or when FILE is -, read standard input.\n\
 338 \n\
 339 *** WARNING ***\n\
 340 The locale specified by the environment affects sort order.\n\
 341 Set LC_COLLATE=C to get the traditional sort order that uses\n\
 342 native byte values.\n\
 343 ")
 344               );
 345       puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
 346     }
 347   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 348      POSIX requires that sort return 1 IFF invoked with -c and
 349      the input is not properly sorted.  */
 350   assert (status == 0 || status == SORT_FAILURE);
 351   exit (status);
 352 }
 353
 354 static struct option const long_options[] =
 355 {
 356   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 357   {"check", no_argument, NULL, 'c'},
 358   {"dictionary-order", no_argument, NULL, 'd'},
 359   {"ignore-case", no_argument, NULL, 'f'},
 360   {"general-numeric-sort", no_argument, NULL, 'g'},
 361   {"ignore-nonprinting", no_argument, NULL, 'i'},
 362   {"key", required_argument, NULL, 'k'},
 363   {"merge", no_argument, NULL, 'm'},
 364   {"month-sort", no_argument, NULL, 'M'},
 365   {"numeric-sort", no_argument, NULL, 'n'},
 366   {"output", required_argument, NULL, 'o'},
 367   {"reverse", no_argument, NULL, 'r'},
 368   {"stable", no_argument, NULL, 's'},
 369   {"buffer-size", required_argument, NULL, 'S'},
 370   {"field-separator", required_argument, NULL, 't'},
 371   {"temporary-directory", required_argument, NULL, 'T'},
 372   {"unique", no_argument, NULL, 'u'},
 373   {"zero-terminated", no_argument, NULL, 'z'},
 374   {GETOPT_HELP_OPTION_DECL},
 375   {GETOPT_VERSION_OPTION_DECL},
 376   {0, 0, 0, 0},
 377 };
 378
 379 /* The set of signals that are caught.  */
 380 static sigset_t caught_signals;
 381
 382 /* The list of temporary files. */
 383 struct tempnode
 384 {
 385   struct tempnode *volatile next;
 386   char name[1];  /* Actual size is 1 + file name length.  */
 387 };
 388 static struct tempnode *volatile temphead;
 389
 390 /* Clean up any remaining temporary files. */
 391
 392 static void
 393 cleanup (void)
 394 {
 395   struct tempnode *node;
 396
 397   for (node = temphead; node; node = node->next)
 398     unlink (node->name);
 399 }
 400
 401 /* Report MESSAGE for FILE, then clean up and exit.  */
 402
 403 static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN;
 404 static void
 405 die (char const *message, char const *file)
 406 {
 407   error (0, errno, "%s: %s", message, file);
 408   cleanup ();
 409   exit (SORT_FAILURE);
 410 }
 411
 412 /* Create a new temporary file, returning its newly allocated name.
 413    Store into *PFP a stream open for writing.  */
 414
 415 static char *
 416 create_temp_file (FILE **pfp)
 417 {
 418   static char const slashbase[] = "/sortXXXXXX";
 419   static size_t temp_dir_index;
 420   sigset_t oldset;
 421   int fd;
 422   int saved_errno;
 423   char const *temp_dir = temp_dirs[temp_dir_index];
 424   size_t len = strlen (temp_dir);
 425   struct tempnode *node =
 426     (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase);
 427   char *file = node->name;
 428
 429   memcpy (file, temp_dir, len);
 430   memcpy (file + len, slashbase, sizeof slashbase);
 431   node->next = temphead;
 432   if (++temp_dir_index == temp_dir_count)
 433     temp_dir_index = 0;
 434
 435   /* Create the temporary file in a critical section, to avoid races.  */
 436   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 437   fd = mkstemp (file);
 438   if (0 <= fd)
 439     temphead = node;
 440   saved_errno = errno;
 441   sigprocmask (SIG_SETMASK, &oldset, NULL);
 442   errno = saved_errno;
 443
 444   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 445     die (_("cannot create temporary file"), file);
 446
 447   return file;
 448 }
 449
 450 static FILE *
 451 xfopen (const char *file, const char *how)
 452 {
 453   FILE *fp;
 454
 455   if (STREQ (file, "-"))
 456     {
 457       if (*how == 'r')
 458         {
 459           have_read_stdin = 1;
 460           fp = stdin;
 461         }
 462       else
 463         fp = stdout;
 464     }
 465   else
 466     {
 467       if ((fp = fopen_safer (file, how)) == NULL)
 468         die (_("open failed"), file);
 469     }
 470
 471   return fp;
 472 }
 473
 474 /* Close FP, whose name is FILE, and report any errors.  */
 475
 476 static void
 477 xfclose (FILE *fp, char const *file)
 478 {
 479   if (fp == stdin)
 480     {
 481       /* Allow reading stdin from tty more than once. */
 482       if (feof (fp))
 483         clearerr (fp);
 484     }
 485   else
 486     {
 487       if (fclose (fp) != 0)
 488         die (_("close failed"), file);
 489     }
 490 }
 491
 492 static void
 493 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 494 {
 495   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 496     die (_("write failed"), output_file);
 497 }
 498
 499 /* Append DIR to the array of temporary directory names.  */
 500 static void
 501 add_temp_dir (char const *dir)
 502 {
 503   if (temp_dir_count == temp_dir_alloc)
 504     {
 505       temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
 506       temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
 507     }
 508
 509   temp_dirs[temp_dir_count++] = dir;
 510 }
 511
 512 /* Search through the list of temporary files for NAME;
 513    remove it if it is found on the list. */
 514
 515 static void
 516 zaptemp (const char *name)
 517 {
 518   struct tempnode *volatile *pnode;
 519   struct tempnode *node;
 520
 521   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 522     if (node->name == name)
 523       {
 524         unlink (name);
 525         *pnode = node->next;
 526         free ((char *) node);
 527         break;
 528       }
 529 }
 530
 531 #ifdef ENABLE_NLS
 532
 533 static int
 534 struct_month_cmp (const void *m1, const void *m2)
 535 {
 536   return strcmp (((const struct month *) m1)->name,
 537                  ((const struct month *) m2)->name);
 538 }
 539
 540 #endif /* NLS */
 541
 542 /* Initialize the character class tables. */
 543
 544 static void
 545 inittables (void)
 546 {
 547   int i;
 548
 549   for (i = 0; i < UCHAR_LIM; ++i)
 550     {
 551       if (ISBLANK (i))
 552         blanks[i] = 1;
 553       if (!ISPRINT (i))
 554         nonprinting[i] = 1;
 555       if (!ISALNUM (i) && !ISBLANK (i))
 556         nondictionary[i] = 1;
 557       if (ISLOWER (i))
 558         fold_toupper[i] = toupper (i);
 559       else
 560         fold_toupper[i] = i;
 561     }
 562
 563 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 564   /* If we're not in the "C" locale, read different names for months.  */
 565   if (hard_LC_TIME)
 566     {
 567       for (i = 0; i < MONTHS_PER_YEAR; i++)
 568         {
 569           char *s;
 570           size_t s_len;
 571           size_t j;
 572           char *name;
 573
 574           s = (char *) nl_langinfo (ABMON_1 + i);
 575           s_len = strlen (s);
 576           monthtab[i].name = name = (char *) xmalloc (s_len + 1);
 577           monthtab[i].val = i + 1;
 578
 579           for (j = 0; j < s_len; j++)
 580             name[j] = fold_toupper[UCHAR (s[j])];
 581           name[j] = '\0';
 582         }
 583       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 584              sizeof (struct month), struct_month_cmp);
 585     }
 586 #endif /* NLS */
 587 }
 588
 589 /* Specify the amount of main memory to use when sorting.  */
 590 static void
 591 specify_sort_size (char const *s)
 592 {
 593   uintmax_t n;
 594   char *suffix;
 595   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkmMPtTYZ");
 596
 597   /* The default unit is kB.  */
 598   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 599     {
 600       if (n <= UINTMAX_MAX / 1024)
 601         n *= 1024;
 602       else
 603         e = LONGINT_OVERFLOW;
 604     }
 605
 606   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 607   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 608     switch (suffix[0])
 609       {
 610       case 'b':
 611         e = LONGINT_OK;
 612         break;
 613
 614       case '%':
 615         {
 616           double mem = physmem_total () * n / 100;
 617
 618           /* Use "<", not "<=", to avoid problems with rounding.  */
 619           if (mem < UINTMAX_MAX)
 620             {
 621               n = mem;
 622               e = LONGINT_OK;
 623             }
 624           else
 625             e = LONGINT_OVERFLOW;
 626         }
 627         break;
 628       }
 629
 630   if (e == LONGINT_OK)
 631     {
 632       sort_size = n;
 633       if (sort_size == n)
 634         {
 635           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 636           return;
 637         }
 638
 639       e = LONGINT_OVERFLOW;
 640     }
 641
 642   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 643 }
 644
 645 /* Return the default sort size.  */
 646 static size_t
 647 default_sort_size (void)
 648 {
 649   /* Let MEM be available memory or 1/8 of total memory, whichever
 650      is greater.  */
 651   double avail = physmem_available ();
 652   double total = physmem_total ();
 653   double mem = MAX (avail, total / 8);
 654   struct rlimit rlimit;
 655
 656   /* Let SIZE be MEM, but no more than the maximum object size or
 657      system resource limits.  Avoid the MIN macro here, as it is not
 658      quite right when only one argument is floating point.  Don't
 659      bother to check for values like RLIM_INFINITY since in practice
 660      they are not much less than SIZE_MAX.  */
 661   size_t size = SIZE_MAX;
 662   if (mem < size)
 663     size = mem;
 664   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 665     size = rlimit.rlim_cur;
 666 #ifdef RLIMIT_AS
 667   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 668     size = rlimit.rlim_cur;
 669 #endif
 670
 671   /* Leave a large safety margin for the above limits, as failure can
 672      occur when they are exceeded.  */
 673   size /= 2;
 674
 675 #ifdef RLIMIT_RSS
 676   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 677      Exceeding RSS is not fatal, but can be quite slow.  */
 678   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 679     size = rlimit.rlim_cur / 16 * 15;
 680 #endif
 681
 682   /* Use no less than the minimum.  */
 683   return MAX (size, MIN_SORT_SIZE);
 684 }
 685
 686 /* Return the sort buffer size to use with the input files identified
 687    by FPS and FILES, which are alternate paths to the same files.
 688    NFILES gives the number of input files; NFPS may be less.  Assume
 689    that each input line requires LINE_BYTES extra bytes' worth of line
 690    information.  Return at most SIZE_BOUND.  */
 691
 692 static size_t
 693 sort_buffer_size (FILE *const *fps, int nfps,
 694                   char *const *files, int nfiles,
 695                   size_t line_bytes, size_t size_bound)
 696 {
 697   /* In the worst case, each input byte is a newline.  */
 698   size_t worst_case_per_input_byte = line_bytes + 1;
 699
 700   /* Keep enough room for one extra input line and an extra byte.
 701      This extra room might be needed when preparing to read EOF.  */
 702   size_t size = worst_case_per_input_byte + 1;
 703
 704   int i;
 705
 706   for (i = 0; i < nfiles; i++)
 707     {
 708       struct stat st;
 709       off_t file_size;
 710       size_t worst_case;
 711
 712       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 713            : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
 714            : stat (files[i], &st))
 715           != 0)
 716         die (_("stat failed"), files[i]);
 717
 718       file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;
 719
 720       /* Add the amount of memory needed to represent the worst case
 721          where the input consists entirely of newlines followed by a
 722          single non-newline.  Check for overflow.  */
 723       worst_case = file_size * worst_case_per_input_byte + 1;
 724       if (file_size != worst_case / worst_case_per_input_byte
 725           || size_bound - size <= worst_case)
 726         return size_bound;
 727       size += worst_case;
 728     }
 729
 730   return size;
 731 }
 732
 733 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 734    must be at least sizeof (struct line).  Allocate ALLOC bytes
 735    initially.  */
 736
 737 static void
 738 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 739 {
 740   /* Ensure that the line array is properly aligned.  If the desired
 741      size cannot be allocated, repeatedly halve it until allocation
 742      succeeds.  The smaller allocation may hurt overall performance,
 743      but that's better than failing.  */
 744   for (;;)
 745     {
 746       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 747       buf->buf = malloc (alloc);
 748       if (buf->buf)
 749         break;
 750       alloc /= 2;
 751       if (alloc <= line_bytes + 1)
 752         xalloc_die ();
 753     }
 754
 755   buf->line_bytes = line_bytes;
 756   buf->alloc = alloc;
 757   buf->used = buf->left = buf->nlines = 0;
 758   buf->eof = 0;
 759 }
 760
 761 /* Return one past the limit of the line array.  */
 762
 763 static inline struct line *
 764 buffer_linelim (struct buffer const *buf)
 765 {
 766   return (struct line *) (buf->buf + buf->alloc);
 767 }
 768
 769 /* Return a pointer to the first character of the field specified
 770    by KEY in LINE. */
 771
 772 static char *
 773 begfield (const struct line *line, const struct keyfield *key)
 774 {
 775   register char *ptr = line->text, *lim = ptr + line->length - 1;
 776   register size_t sword = key->sword;
 777   register size_t schar = key->schar;
 778
 779   if (tab)
 780     while (ptr < lim && sword--)
 781       {
 782         while (ptr < lim && *ptr != tab)
 783           ++ptr;
 784         if (ptr < lim)
 785           ++ptr;
 786       }
 787   else
 788     while (ptr < lim && sword--)
 789       {
 790         while (ptr < lim && blanks[UCHAR (*ptr)])
 791           ++ptr;
 792         while (ptr < lim && !blanks[UCHAR (*ptr)])
 793           ++ptr;
 794       }
 795
 796   if (key->skipsblanks)
 797     while (ptr < lim && blanks[UCHAR (*ptr)])
 798       ++ptr;
 799
 800   if (schar < lim - ptr)
 801     ptr += schar;
 802   else
 803     ptr = lim;
 804
 805   return ptr;
 806 }
 807
 808 /* Return the limit of (a pointer to the first character after) the field
 809    in LINE specified by KEY. */
 810
 811 static char *
 812 limfield (const struct line *line, const struct keyfield *key)
 813 {
 814   register char *ptr = line->text, *lim = ptr + line->length - 1;
 815   register size_t eword = key->eword, echar = key->echar;
 816
 817   /* Note: from the POSIX spec:
 818      The leading field separator itself is included in
 819      a field when -t is not used.  FIXME: move this comment up... */
 820
 821   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 822      whichever comes first.  If there are more than EWORD fields, leave
 823      PTR pointing at the beginning of the field having zero-based index,
 824      EWORD.  If a delimiter character was specified (via -t), then that
 825      `beginning' is the first character following the delimiting TAB.
 826      Otherwise, leave PTR pointing at the first `blank' character after
 827      the preceding field.  */
 828   if (tab)
 829     while (ptr < lim && eword--)
 830       {
 831         while (ptr < lim && *ptr != tab)
 832           ++ptr;
 833         if (ptr < lim && (eword | echar))
 834           ++ptr;
 835       }
 836   else
 837     while (ptr < lim && eword--)
 838       {
 839         while (ptr < lim && blanks[UCHAR (*ptr)])
 840           ++ptr;
 841         while (ptr < lim && !blanks[UCHAR (*ptr)])
 842           ++ptr;
 843       }
 844
 845 #ifdef POSIX_UNSPECIFIED
 846   /* The following block of code makes GNU sort incompatible with
 847      standard Unix sort, so it's ifdef'd out for now.
 848      The POSIX spec isn't clear on how to interpret this.
 849      FIXME: request clarification.
 850
 851      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 852      Date: Thu, 30 May 96 12:20:41 -0400
 853
 854      [...]I believe I've found another bug in `sort'.
 855
 856      $ cat /tmp/sort.in
 857      a b c 2 d
 858      pq rs 1 t
 859      $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
 860      a b c 2 d
 861      pq rs 1 t
 862      $ /bin/sort +0.6 -0.7 </tmp/sort.in
 863      pq rs 1 t
 864      a b c 2 d
 865
 866      Unix sort produced the answer I expected: sort on the single character
 867      in column 6.  GNU sort produced different results, because it disagrees
 868      on the interpretation of the key-end spec "-M.N".  Unix sort reads this
 869      as "skip M fields, then N characters"; but GNU sort wants it to mean
 870      "skip M fields, then either N characters or the rest of the current
 871      field, whichever comes first".  This extra clause applies only to
 872      key-ends, not key-starts.
 873      */
 874
 875   /* Make LIM point to the end of (one byte past) the current field.  */
 876   if (tab)
 877     {
 878       char *newlim;
 879       newlim = memchr (ptr, tab, lim - ptr);
 880       if (newlim)
 881         lim = newlim;
 882     }
 883   else
 884     {
 885       char *newlim;
 886       newlim = ptr;
 887       while (newlim < lim && blanks[UCHAR (*newlim)])
 888         ++newlim;
 889       while (newlim < lim && !blanks[UCHAR (*newlim)])
 890         ++newlim;
 891       lim = newlim;
 892     }
 893 #endif
 894
 895   /* If we're skipping leading blanks, don't start counting characters
 896      until after skipping past any leading blanks.  */
 897   if (key->skipsblanks)
 898     while (ptr < lim && blanks[UCHAR (*ptr)])
 899       ++ptr;
 900
 901   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 902   if (echar < lim - ptr)
 903     ptr += echar;
 904   else
 905     ptr = lim;
 906
 907   return ptr;
 908 }
 909
 910 /* FIXME */
 911
 912 static void
 913 trim_trailing_blanks (const char *a_start, char **a_end)
 914 {
 915   while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
 916     --(*a_end);
 917 }
 918
 919 /* Fill BUF reading from FP, moving buf->left bytes from the end
 920    of buf->buf to the beginning first.  If EOF is reached and the
 921    file wasn't terminated by a newline, supply one.  Set up BUF's line
 922    table too.  FILE is the name of the file corresponding to FP.
 923    Return nonzero if some input was read.  */
 924
 925 static int
 926 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
 927 {
 928   struct keyfield const *key = keylist;
 929   char eol = eolchar;
 930   size_t line_bytes = buf->line_bytes;
 931   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
 932
 933   if (buf->eof)
 934     return 0;
 935
 936   if (buf->used != buf->left)
 937     {
 938       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 939       buf->used = buf->left;
 940       buf->nlines = 0;
 941     }
 942
 943   for (;;)
 944     {
 945       char *ptr = buf->buf + buf->used;
 946       struct line *linelim = buffer_linelim (buf);
 947       struct line *line = linelim - buf->nlines;
 948       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
 949       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
 950
 951       while (line_bytes + 1 < avail)
 952         {
 953           /* Read as many bytes as possible, but do not read so many
 954              bytes that there might not be enough room for the
 955              corresponding line array.  The worst case is when the
 956              rest of the input file consists entirely of newlines,
 957              except that the last byte is not a newline.  */
 958           size_t readsize = (avail - 1) / (line_bytes + 1);
 959           size_t bytes_read = fread (ptr, 1, readsize, fp);
 960           char *ptrlim = ptr + bytes_read;
 961           char *p;
 962           avail -= bytes_read;
 963
 964           if (bytes_read != readsize)
 965             {
 966               if (ferror (fp))
 967                 die (_("read failed"), file);
 968               if (feof (fp))
 969                 {
 970                   buf->eof = 1;
 971                   if (buf->buf == ptrlim)
 972                     return 0;
 973                   if (ptrlim[-1] != eol)
 974                     *ptrlim++ = eol;
 975                 }
 976             }
 977
 978           /* Find and record each line in the just-read input.  */
 979           while ((p = memchr (ptr, eol, ptrlim - ptr)))
 980             {
 981               ptr = p + 1;
 982               line--;
 983               line->text = line_start;
 984               line->length = ptr - line_start;
 985               mergesize = MAX (mergesize, line->length);
 986               avail -= line_bytes;
 987
 988               if (key)
 989                 {
 990                   /* Precompute the position of the first key for
 991                      efficiency. */
 992                   line->keylim = key->eword == -1 ? p : limfield (line, key);
 993
 994                   if (key->sword != -1)
 995                     line->keybeg = begfield (line, key);
 996                   else
 997                     {
 998                       if (key->skipsblanks)
 999                         while (blanks[UCHAR (*line_start)])
1000                           line_start++;
1001                       line->keybeg = line_start;
1002                     }
1003                   if (key->skipeblanks)
1004                     trim_trailing_blanks (line->keybeg, &line->keylim);
1005                 }
1006
1007               line_start = ptr;
1008             }
1009
1010           ptr = ptrlim;
1011           if (buf->eof)
1012             break;
1013         }
1014
1015       buf->used = ptr - buf->buf;
1016       buf->nlines = buffer_linelim (buf) - line;
1017       if (buf->nlines != 0)
1018         {
1019           buf->left = ptr - line_start;
1020           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1021           return 1;
1022         }
1023
1024       /* The current input line is too long to fit in the buffer.
1025          Double the buffer size and try again.  */
1026       if (2 * buf->alloc < buf->alloc)
1027         xalloc_die ();
1028       buf->alloc *= 2;
1029       buf->buf = xrealloc (buf->buf, buf->alloc);
1030     }
1031 }
1032
1033 /* Compare strings A and B containing decimal fractions < 1.  Each string
1034    should begin with a decimal point followed immediately by the digits
1035    of the fraction.  Strings not of this form are considered to be zero. */
1036
1037 /* The goal here, is to take two numbers a and b... compare these
1038    in parallel.  Instead of converting each, and then comparing the
1039    outcome.  Most likely stopping the comparison before the conversion
1040    is complete.  The algorithm used, in the old sort:
1041
1042    Algorithm: fraccompare
1043    Action   : compare two decimal fractions
1044    accepts  : char *a, char *b
1045    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1046    implement:
1047
1048    if *a == decimal_point AND *b == decimal_point
1049      find first character different in a and b.
1050      if both are digits, return the difference *a - *b.
1051      if *a is a digit
1052        skip past zeros
1053        if digit return 1, else 0
1054      if *b is a digit
1055        skip past zeros
1056        if digit return -1, else 0
1057    if *a is a decimal_point
1058      skip past decimal_point and zeros
1059      if digit return 1, else 0
1060    if *b is a decimal_point
1061      skip past decimal_point and zeros
1062      if digit return -1, else 0
1063    return 0 */
1064
1065 static int
1066 fraccompare (register const char *a, register const char *b)
1067 {
1068   if (*a == decimal_point && *b == decimal_point)
1069     {
1070       while (*++a == *++b)
1071         if (! ISDIGIT (*a))
1072           return 0;
1073       if (ISDIGIT (*a) && ISDIGIT (*b))
1074         return *a - *b;
1075       if (ISDIGIT (*a))
1076         goto a_trailing_nonzero;
1077       if (ISDIGIT (*b))
1078         goto b_trailing_nonzero;
1079       return 0;
1080     }
1081   else if (*a++ == decimal_point)
1082     {
1083     a_trailing_nonzero:
1084       while (*a == NUMERIC_ZERO)
1085         a++;
1086       return ISDIGIT (*a);
1087     }
1088   else if (*b++ == decimal_point)
1089     {
1090     b_trailing_nonzero:
1091       while (*b == NUMERIC_ZERO)
1092         b++;
1093       return - ISDIGIT (*b);
1094     }
1095   return 0;
1096 }
1097
1098 /* Compare strings A and B as numbers without explicitly converting them to
1099    machine numbers.  Comparatively slow for short strings, but asymptotically
1100    hideously fast. */
1101
1102 static int
1103 numcompare (register const char *a, register const char *b)
1104 {
1105   register int tmpa, tmpb, tmp;
1106   register size_t loga, logb;
1107
1108   tmpa = *a;
1109   tmpb = *b;
1110
1111   while (blanks[UCHAR (tmpa)])
1112     tmpa = *++a;
1113   while (blanks[UCHAR (tmpb)])
1114     tmpb = *++b;
1115
1116   if (tmpa == NEGATION_SIGN)
1117     {
1118       do
1119         tmpa = *++a;
1120       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1121       if (tmpb != NEGATION_SIGN)
1122         {
1123           if (tmpa == decimal_point)
1124             do
1125               tmpa = *++a;
1126             while (tmpa == NUMERIC_ZERO);
1127           if (ISDIGIT (tmpa))
1128             return -1;
1129           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1130             tmpb = *++b;
1131           if (tmpb == decimal_point)
1132             do
1133               tmpb = *++b;
1134             while (tmpb == NUMERIC_ZERO);
1135           if (ISDIGIT (tmpb))
1136             return -1;
1137           return 0;
1138         }
1139       do
1140         tmpb = *++b;
1141       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1142
1143       while (tmpa == tmpb && ISDIGIT (tmpa))
1144         {
1145           do
1146             tmpa = *++a;
1147           while (IS_THOUSANDS_SEP (tmpa));
1148           do
1149             tmpb = *++b;
1150           while (IS_THOUSANDS_SEP (tmpb));
1151         }
1152
1153       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1154           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1155         return -fraccompare (a, b);
1156
1157       tmp = tmpb - tmpa;
1158
1159       for (loga = 0; ISDIGIT (tmpa); ++loga)
1160         do
1161           tmpa = *++a;
1162         while (IS_THOUSANDS_SEP (tmpa));
1163
1164       for (logb = 0; ISDIGIT (tmpb); ++logb)
1165         do
1166           tmpb = *++b;
1167         while (IS_THOUSANDS_SEP (tmpb));
1168
1169       if (loga != logb)
1170         return loga < logb ? 1 : -1;
1171
1172       if (!loga)
1173         return 0;
1174
1175       return tmp;
1176     }
1177   else if (tmpb == NEGATION_SIGN)
1178     {
1179       do
1180         tmpb = *++b;
1181       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1182       if (tmpb == decimal_point)
1183         do
1184           tmpb = *++b;
1185         while (tmpb == NUMERIC_ZERO);
1186       if (ISDIGIT (tmpb))
1187         return 1;
1188       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1189         tmpa = *++a;
1190       if (tmpa == decimal_point)
1191         do
1192           tmpa = *++a;
1193         while (tmpa == NUMERIC_ZERO);
1194       if (ISDIGIT (tmpa))
1195         return 1;
1196       return 0;
1197     }
1198   else
1199     {
1200       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1201         tmpa = *++a;
1202       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1203         tmpb = *++b;
1204
1205       while (tmpa == tmpb && ISDIGIT (tmpa))
1206         {
1207           do
1208             tmpa = *++a;
1209           while (IS_THOUSANDS_SEP (tmpa));
1210           do
1211             tmpb = *++b;
1212           while (IS_THOUSANDS_SEP (tmpb));
1213         }
1214
1215       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1216           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1217         return fraccompare (a, b);
1218
1219       tmp = tmpa - tmpb;
1220
1221       for (loga = 0; ISDIGIT (tmpa); ++loga)
1222         do
1223           tmpa = *++a;
1224         while (IS_THOUSANDS_SEP (tmpa));
1225
1226       for (logb = 0; ISDIGIT (tmpb); ++logb)
1227         do
1228           tmpb = *++b;
1229         while (IS_THOUSANDS_SEP (tmpb));
1230
1231       if (loga != logb)
1232         return loga < logb ? -1 : 1;
1233
1234       if (!loga)
1235         return 0;
1236
1237       return tmp;
1238     }
1239 }
1240
1241 static int
1242 general_numcompare (const char *sa, const char *sb)
1243 {
1244   /* FIXME: add option to warn about failed conversions.  */
1245   /* FIXME: maybe add option to try expensive FP conversion
1246      only if A and B can't be compared more cheaply/accurately.  */
1247
1248   char *ea;
1249   char *eb;
1250   double a = strtod (sa, &ea);
1251   double b = strtod (sb, &eb);
1252
1253   /* Put conversion errors at the start of the collating sequence.  */
1254   if (sa == ea)
1255     return sb == eb ? 0 : -1;
1256   if (sb == eb)
1257     return 1;
1258
1259   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1260      conversion errors but before numbers; sort them by internal
1261      bit-pattern, for lack of a more portable alternative.  */
1262   return (a < b ? -1
1263           : a > b ? 1
1264           : a == b ? 0
1265           : b == b ? -1
1266           : a == a ? 1
1267           : memcmp ((char *) &a, (char *) &b, sizeof a));
1268 }
1269
1270 /* Return an integer in 1..12 of the month name S with length LEN.
1271    Return 0 if the name in S is not recognized.  */
1272
1273 static int
1274 getmonth (const char *s, size_t len)
1275 {
1276   char *month;
1277   register size_t i;
1278   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1279
1280   while (len > 0 && blanks[UCHAR (*s)])
1281     {
1282       ++s;
1283       --len;
1284     }
1285
1286   if (len == 0)
1287     return 0;
1288
1289   month = (char *) alloca (len + 1);
1290   for (i = 0; i < len; ++i)
1291     month[i] = fold_toupper[UCHAR (s[i])];
1292   while (blanks[UCHAR (month[i - 1])])
1293     --i;
1294   month[i] = '\0';
1295
1296   do
1297     {
1298       int ix = (lo + hi) / 2;
1299
1300       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1301         hi = ix;
1302       else
1303         lo = ix;
1304     }
1305   while (hi - lo > 1);
1306
1307   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1308             ? monthtab[lo].val : 0);
1309
1310   return result;
1311 }
1312
1313 /* Compare two lines A and B trying every key in sequence until there
1314    are no more keys or a difference is found. */
1315
1316 static int
1317 keycompare (const struct line *a, const struct line *b)
1318 {
1319   struct keyfield *key = keylist;
1320
1321   /* For the first iteration only, the key positions have been
1322      precomputed for us. */
1323   register char *texta = a->keybeg;
1324   register char *textb = b->keybeg;
1325   register char *lima = a->keylim;
1326   register char *limb = b->keylim;
1327
1328   int diff;
1329
1330   for (;;)
1331     {
1332       register unsigned char *translate = (unsigned char *) key->translate;
1333       register int *ignore = key->ignore;
1334
1335       /* Find the lengths. */
1336       size_t lena = lima <= texta ? 0 : lima - texta;
1337       size_t lenb = limb <= textb ? 0 : limb - textb;
1338
1339       if (key->skipeblanks)
1340         {
1341           char *a_end = texta + lena;
1342           char *b_end = textb + lenb;
1343           trim_trailing_blanks (texta, &a_end);
1344           trim_trailing_blanks (textb, &b_end);
1345           lena = a_end - texta;
1346           lenb = b_end - textb;
1347         }
1348
1349       /* Actually compare the fields. */
1350       if (key->numeric | key->general_numeric)
1351         {
1352           char savea = *lima, saveb = *limb;
1353
1354           *lima = *limb = '\0';
1355           diff = ((key->numeric ? numcompare : general_numcompare)
1356                   (texta, textb));
1357           *lima = savea, *limb = saveb;
1358         }
1359       else if (key->month)
1360         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1361 #ifdef ENABLE_NLS
1362       /* Sorting like this may become slow, so in a simple locale the user
1363          can select a faster sort that is similar to ascii sort  */
1364       else if (hard_LC_COLLATE)
1365         {
1366           if (ignore || translate)
1367             {
1368               char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1369               char *copy_b = copy_a + lena + 1;
1370               size_t new_len_a, new_len_b, i;
1371
1372               /* Ignore and/or translate chars before comparing.  */
1373               for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1374                 {
1375                   if (i < lena)
1376                     {
1377                       copy_a[new_len_a] = (translate
1378                                            ? translate[UCHAR (texta[i])]
1379                                            : texta[i]);
1380                       if (!ignore || !ignore[UCHAR (texta[i])])
1381                         ++new_len_a;
1382                     }
1383                   if (i < lenb)
1384                     {
1385                       copy_b[new_len_b] = (translate
1386                                            ? translate[UCHAR (textb[i])]
1387                                            : textb [i]);
1388                       if (!ignore || !ignore[UCHAR (textb[i])])
1389                         ++new_len_b;
1390                     }
1391                 }
1392
1393               diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
1394             }
1395           else if (lena == 0)
1396             diff = - NONZERO (lenb);
1397           else if (lenb == 0)
1398             goto greater;
1399           else
1400             diff = memcoll (texta, lena, textb, lenb);
1401         }
1402 #endif
1403       else if (ignore)
1404         {
1405 #define CMP_WITH_IGNORE(A, B)                                           \
1406   do                                                                    \
1407     {                                                                   \
1408           for (;;)                                                      \
1409             {                                                           \
1410               while (texta < lima && ignore[UCHAR (*texta)])            \
1411                 ++texta;                                                \
1412               while (textb < limb && ignore[UCHAR (*textb)])            \
1413                 ++textb;                                                \
1414               if (! (texta < lima && textb < limb))                     \
1415                 break;                                                  \
1416               diff = UCHAR (A) - UCHAR (B);                             \
1417               if (diff)                                                 \
1418                 goto not_equal;                                         \
1419               ++texta;                                                  \
1420               ++textb;                                                  \
1421             }                                                           \
1422                                                                         \
1423           diff = (lima - texta) - (limb - textb);                       \
1424     }                                                                   \
1425   while (0)
1426
1427           if (translate)
1428             CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1429                              translate[UCHAR (*textb)]);
1430           else
1431             CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1432         }
1433       else if (lena == 0)
1434         diff = - NONZERO (lenb);
1435       else if (lenb == 0)
1436         goto greater;
1437       else
1438         {
1439           if (translate)
1440             {
1441               while (texta < lima && textb < limb)
1442                 {
1443                   diff = (UCHAR (translate[UCHAR (*texta++)])
1444                           - UCHAR (translate[UCHAR (*textb++)]));
1445                   if (diff)
1446                     goto not_equal;
1447                 }
1448             }
1449           else
1450             {
1451               diff = memcmp (texta, textb, min (lena, lenb));
1452               if (diff)
1453                 goto not_equal;
1454             }
1455           diff = lena < lenb ? -1 : lena != lenb;
1456         }
1457
1458       if (diff)
1459         goto not_equal;
1460
1461       key = key->next;
1462       if (! key)
1463         break;
1464
1465       /* Find the beginning and limit of the next field.  */
1466       if (key->eword != -1)
1467         lima = limfield (a, key), limb = limfield (b, key);
1468       else
1469         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1470
1471       if (key->sword != -1)
1472         texta = begfield (a, key), textb = begfield (b, key);
1473       else
1474         {
1475           texta = a->text, textb = b->text;
1476           if (key->skipsblanks)
1477             {
1478               while (texta < lima && blanks[UCHAR (*texta)])
1479                 ++texta;
1480               while (textb < limb && blanks[UCHAR (*textb)])
1481                 ++textb;
1482             }
1483         }
1484     }
1485
1486   return 0;
1487
1488  greater:
1489   diff = 1;
1490  not_equal:
1491   return key->reverse ? -diff : diff;
1492 }
1493
1494 /* Compare two lines A and B, returning negative, zero, or positive
1495    depending on whether A compares less than, equal to, or greater than B. */
1496
1497 static int
1498 compare (register const struct line *a, register const struct line *b)
1499 {
1500   int diff;
1501   size_t alen, blen;
1502
1503   /* First try to compare on the specified keys (if any).
1504      The only two cases with no key at all are unadorned sort,
1505      and unadorned sort -r. */
1506   if (keylist)
1507     {
1508       diff = keycompare (a, b);
1509       alloca (0);
1510       if (diff != 0 || unique || stable)
1511         return diff;
1512     }
1513
1514   /* If the keys all compare equal (or no keys were specified)
1515      fall through to the default comparison.  */
1516   alen = a->length - 1, blen = b->length - 1;
1517
1518   if (alen == 0)
1519     diff = - NONZERO (blen);
1520   else if (blen == 0)
1521     diff = NONZERO (alen);
1522 #ifdef ENABLE_NLS
1523   else if (hard_LC_COLLATE)
1524     diff = memcoll (a->text, alen, b->text, blen);
1525 #endif
1526   else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
1527     diff = alen < blen ? -1 : alen != blen;
1528
1529   return reverse ? -diff : diff;
1530 }
1531
1532 /* Check that the lines read from the given FP come in order.  Print a
1533    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1534    one if they are not in order.  Otherwise, print no diagnostic
1535    and return zero.  */
1536
1537 static int
1538 checkfp (FILE *fp, char *file_name)
1539 {
1540   struct buffer buf;            /* Input buffer. */
1541   struct line temp;             /* Copy of previous line. */
1542   size_t alloc = 0;
1543   uintmax_t line_number = 0;
1544   struct keyfield *key = keylist;
1545   int nonunique = 1 - unique;
1546   int disordered = 0;
1547
1548   initbuf (&buf, sizeof (struct line),
1549            MAX (merge_buffer_size, sort_size));
1550   temp.text = NULL;
1551
1552   while (fillbuf (&buf, fp, file_name))
1553     {
1554       struct line const *line = buffer_linelim (&buf);
1555       struct line const *linebase = line - buf.nlines;
1556
1557       /* Make sure the line saved from the old buffer contents is
1558          less than or equal to the first line of the new buffer. */
1559       if (alloc && nonunique <= compare (&temp, line - 1))
1560         {
1561         found_disorder:
1562           {
1563             char hr_buf[LONGEST_HUMAN_READABLE + 1];
1564             struct line const *disorder_line = line - 1;
1565             uintmax_t disorder_line_number =
1566               buffer_linelim (&buf) - disorder_line + line_number;
1567             fprintf (stderr, _("%s: %s:%s: disorder: "),
1568                      program_name, file_name,
1569                      human_readable (disorder_line_number, hr_buf, 1, 1));
1570             write_bytes (disorder_line->text, disorder_line->length, stderr,
1571                          _("standard error"));
1572             disordered = 1;
1573             break;
1574           }
1575         }
1576
1577       /* Compare each line in the buffer with its successor.  */
1578       while (linebase < --line)
1579         if (nonunique <= compare (line, line - 1))
1580           goto found_disorder;
1581
1582       line_number += buf.nlines;
1583
1584       /* Save the last line of the buffer.  */
1585       if (alloc < line->length)
1586         {
1587           do
1588             {
1589               alloc *= 2;
1590               if (! alloc)
1591                 {
1592                   alloc = line->length;
1593                   break;
1594                 }
1595             }
1596           while (alloc < line->length);
1597
1598           temp.text = xrealloc (temp.text, alloc);
1599         }
1600       memcpy (temp.text, line->text, line->length);
1601       temp.length = line->length;
1602       if (key)
1603         {
1604           temp.keybeg = temp.text + (line->keybeg - line->text);
1605           temp.keylim = temp.text + (line->keylim - line->text);
1606         }
1607     }
1608
1609   xfclose (fp, file_name);
1610   free (buf.buf);
1611   if (temp.text)
1612     free (temp.text);
1613   return disordered;
1614 }
1615
1616 /* Merge lines from FILES onto OFP.  NFILES cannot be greater than
1617    NMERGE.  Close input and output files before returning.
1618    OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1619    output file has not been opened yet.  */
1620
1621 static void
1622 mergefps (char **files, register int nfiles,
1623           FILE *ofp, const char *output_file)
1624 {
1625   FILE *fps[NMERGE];            /* Input streams for each file.  */
1626   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1627   struct line saved;            /* Saved line storage for unique check. */
1628   struct line const *savedline = NULL;
1629                                 /* &saved if there is a saved line. */
1630   size_t savealloc = 0;         /* Size allocated for the saved line. */
1631   struct line const *cur[NMERGE]; /* Current line in each line table. */
1632   struct line const *base[NMERGE]; /* Base of each line table.  */
1633   int ord[NMERGE];              /* Table representing a permutation of fps,
1634                                    such that cur[ord[0]] is the smallest line
1635                                    and will be next output. */
1636   register int i, j, t;
1637   struct keyfield *key = keylist;
1638   saved.text = NULL;
1639
1640   /* Read initial lines from each input file. */
1641   for (i = 0; i < nfiles; ++i)
1642     {
1643       fps[i] = xfopen (files[i], "r");
1644       initbuf (&buffer[i], sizeof (struct line),
1645                MAX (merge_buffer_size, sort_size / nfiles));
1646       /* If a file is empty, eliminate it from future consideration. */
1647       while (i < nfiles && !fillbuf (&buffer[i], fps[i], files[i]))
1648         {
1649           xfclose (fps[i], files[i]);
1650           zaptemp (files[i]);
1651           --nfiles;
1652           for (j = i; j < nfiles; ++j)
1653             files[j] = files[j + 1];
1654         }
1655       if (i == nfiles)
1656         free (buffer[i].buf);
1657       else
1658         {
1659           struct line const *linelim = buffer_linelim (&buffer[i]);
1660           cur[i] = linelim - 1;
1661           base[i] = linelim - buffer[i].nlines;
1662         }
1663     }
1664
1665   if (! ofp)
1666     ofp = xfopen (output_file, "w");
1667
1668   /* Set up the ord table according to comparisons among input lines.
1669      Since this only reorders two items if one is strictly greater than
1670      the other, it is stable. */
1671   for (i = 0; i < nfiles; ++i)
1672     ord[i] = i;
1673   for (i = 1; i < nfiles; ++i)
1674     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1675       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1676
1677   /* Repeatedly output the smallest line until no input remains. */
1678   while (nfiles)
1679     {
1680       struct line const *smallest = cur[ord[0]];
1681
1682       /* If uniquified output is turned on, output only the first of
1683          an identical series of lines. */
1684       if (unique)
1685         {
1686           if (savedline && compare (savedline, smallest))
1687             {
1688               savedline = 0;
1689               write_bytes (saved.text, saved.length, ofp, output_file);
1690             }
1691           if (!savedline)
1692             {
1693               savedline = &saved;
1694               if (savealloc < smallest->length)
1695                 {
1696                   do
1697                     if (! savealloc)
1698                       {
1699                         savealloc = smallest->length;
1700                         break;
1701                       }
1702                   while ((savealloc *= 2) < smallest->length);
1703
1704                   saved.text = xrealloc (saved.text, savealloc);
1705                 }
1706               saved.length = smallest->length;
1707               memcpy (saved.text, smallest->text, saved.length);
1708               if (key)
1709                 {
1710                   saved.keybeg =
1711                     saved.text + (smallest->keybeg - smallest->text);
1712                   saved.keylim =
1713                     saved.text + (smallest->keylim - smallest->text);
1714                 }
1715             }
1716         }
1717       else
1718         write_bytes (smallest->text, smallest->length, ofp, output_file);
1719
1720       /* Check if we need to read more lines into core. */
1721       if (base[ord[0]] < smallest)
1722         cur[ord[0]] = smallest - 1;
1723       else
1724         {
1725           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1726             {
1727               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1728               cur[ord[0]] = linelim - 1;
1729               base[ord[0]] = linelim - buffer[ord[0]].nlines;
1730             }
1731           else
1732             {
1733               /* We reached EOF on fps[ord[0]]. */
1734               for (i = 1; i < nfiles; ++i)
1735                 if (ord[i] > ord[0])
1736                   --ord[i];
1737               --nfiles;
1738               xfclose (fps[ord[0]], files[ord[0]]);
1739               zaptemp (files[ord[0]]);
1740               free (buffer[ord[0]].buf);
1741               for (i = ord[0]; i < nfiles; ++i)
1742                 {
1743                   fps[i] = fps[i + 1];
1744                   files[i] = files[i + 1];
1745                   buffer[i] = buffer[i + 1];
1746                   cur[i] = cur[i + 1];
1747                   base[i] = base[i + 1];
1748                 }
1749               for (i = 0; i < nfiles; ++i)
1750                 ord[i] = ord[i + 1];
1751               continue;
1752             }
1753         }
1754
1755       /* The new line just read in may be larger than other lines
1756          already in core; push it back in the queue until we encounter
1757          a line larger than it. */
1758       for (i = 1; i < nfiles; ++i)
1759         {
1760           t = compare (cur[ord[0]], cur[ord[i]]);
1761           if (!t)
1762             t = ord[0] - ord[i];
1763           if (t < 0)
1764             break;
1765         }
1766       t = ord[0];
1767       for (j = 1; j < i; ++j)
1768         ord[j - 1] = ord[j];
1769       ord[i - 1] = t;
1770     }
1771
1772   if (unique && savedline)
1773     {
1774       write_bytes (saved.text, saved.length, ofp, output_file);
1775       free (saved.text);
1776     }
1777
1778   xfclose (ofp, output_file);
1779 }
1780
1781 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1782    The input and output arrays are in reverse order, and LINES and
1783    TEMP point just past the end of their respective arrays.  */
1784
1785 static void
1786 sortlines (struct line *lines, size_t nlines, struct line *temp)
1787 {
1788   register struct line *lo, *hi, *t;
1789   register size_t nlo, nhi;
1790
1791   if (nlines == 2)
1792     {
1793       if (0 < compare (&lines[-1], &lines[-2]))
1794         {
1795           struct line tmp = lines[-1];
1796           lines[-1] = lines[-2];
1797           lines[-2] = tmp;
1798         }
1799       return;
1800     }
1801
1802   nlo = nlines / 2;
1803   lo = lines;
1804   nhi = nlines - nlo;
1805   hi = lines - nlo;
1806
1807   if (nlo > 1)
1808     sortlines (lo, nlo, temp);
1809
1810   if (nhi > 1)
1811     sortlines (hi, nhi, temp);
1812
1813   t = temp;
1814
1815   while (nlo && nhi)
1816     if (compare (lo - 1, hi - 1) <= 0)
1817       *--t = *--lo, --nlo;
1818     else
1819       *--t = *--hi, --nhi;
1820   while (nlo--)
1821     *--t = *--lo;
1822
1823   for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1824     *--lo = *--t;
1825 }
1826
1827 /* Return the index of the first of NFILES FILES that is the same file
1828    as OUTFILE.  If none can be the same, return NFILES.  Consider an
1829    input pipe to be the same as OUTFILE, since the pipe might be the
1830    output of a command like "cat OUTFILE".  */
1831
1832 static int
1833 first_same_file (char **files, int nfiles, char const *outfile)
1834 {
1835   int i;
1836   int got_outstat = 0;
1837   struct stat instat, outstat;
1838
1839   for (i = 0; i < nfiles; i++)
1840     {
1841       int standard_input = STREQ (files[i], "-");
1842
1843       if (STREQ (outfile, files[i]) && ! standard_input)
1844         return i;
1845
1846       if (! got_outstat)
1847         {
1848           got_outstat = 1;
1849           if ((STREQ (outfile, "-")
1850                ? fstat (STDOUT_FILENO, &outstat)
1851                : stat (outfile, &outstat))
1852               != 0)
1853             return nfiles;
1854         }
1855
1856       if (((standard_input
1857             ? fstat (STDIN_FILENO, &instat)
1858             : stat (files[i], &instat))
1859            == 0)
1860           && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1861         return i;
1862     }
1863
1864   return nfiles;
1865 }
1866
1867 /* Check that each of the NFILES FILES is ordered.
1868    Return a count of disordered files. */
1869
1870 static int
1871 check (char **files, int nfiles)
1872 {
1873   int i, disorders = 0;
1874   FILE *fp;
1875
1876   for (i = 0; i < nfiles; ++i)
1877     {
1878       fp = xfopen (files[i], "r");
1879       disorders += checkfp (fp, files[i]);
1880     }
1881   return disorders;
1882 }
1883
1884 /* Merge NFILES FILES onto OUTPUT_FILE.  However, merge at most
1885    MAX_MERGE input files directly onto OUTPUT_FILE.  MAX_MERGE cannot
1886    exceed NMERGE.  */
1887
1888 static void
1889 merge (char **files, int nfiles, int max_merge, char const *output_file)
1890 {
1891   while (max_merge < nfiles)
1892     {
1893       FILE *tfp;
1894       int i, t = 0;
1895       char *temp;
1896       for (i = 0; i < nfiles / NMERGE; ++i)
1897         {
1898           temp = create_temp_file (&tfp);
1899           mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1900           files[t++] = temp;
1901         }
1902       temp = create_temp_file (&tfp);
1903       mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1904       files[t++] = temp;
1905       nfiles = t;
1906       if (nfiles == 1)
1907         break;
1908     }
1909
1910   mergefps (files, nfiles, NULL, output_file);
1911 }
1912
1913 /* Sort NFILES FILES onto OUTPUT_FILE. */
1914
1915 static void
1916 sort (char **files, int nfiles, char const *output_file)
1917 {
1918   struct buffer buf;
1919   int n_temp_files = 0;
1920   int output_file_created = 0;
1921
1922   static size_t size;
1923   if (! size && ! (size = sort_size))
1924     size = default_sort_size ();
1925
1926   buf.alloc = 0;
1927
1928   while (nfiles)
1929     {
1930       char const *temp_output;
1931       char const *file = *files;
1932       FILE *fp = xfopen (file, "r");
1933       FILE *tfp;
1934
1935       if (! buf.alloc)
1936         initbuf (&buf, 2 * sizeof (struct line),
1937                  sort_buffer_size (&fp, 1, files, nfiles,
1938                                    2 * sizeof (struct line), size));
1939       buf.eof = 0;
1940       files++;
1941       nfiles--;
1942
1943       while (fillbuf (&buf, fp, file))
1944         {
1945           struct line *line;
1946           struct line *linebase;
1947
1948           if (buf.eof && nfiles
1949               && (2 * sizeof (struct line) + 1
1950                   < (buf.alloc - buf.used
1951                      - 2 * sizeof (struct line) * buf.nlines)))
1952             {
1953               /* End of file, but there is more input and buffer room.
1954                  Concatenate the next input file; this is faster in
1955                  the usual case.  */
1956               buf.left = buf.used;
1957               break;
1958             }
1959
1960           line = buffer_linelim (&buf);
1961           linebase = line - buf.nlines;
1962           sortlines (line, buf.nlines, linebase);
1963           if (buf.eof && !nfiles && !n_temp_files && !buf.left)
1964             {
1965               xfclose (fp, file);
1966               tfp = xfopen (output_file, "w");
1967               temp_output = output_file;
1968               output_file_created = 1;
1969             }
1970           else
1971             {
1972               ++n_temp_files;
1973               temp_output = create_temp_file (&tfp);
1974             }
1975
1976           do
1977             {
1978               line--;
1979               write_bytes (line->text, line->length, tfp, temp_output);
1980               if (unique)
1981                 while (linebase < line && compare (line, line - 1) == 0)
1982                   line--;
1983             }
1984           while (linebase < line);
1985
1986           xfclose (tfp, temp_output);
1987
1988           if (output_file_created)
1989             goto finish;
1990         }
1991       xfclose (fp, file);
1992     }
1993
1994  finish:
1995   free (buf.buf);
1996
1997   if (! output_file_created)
1998     {
1999       int i = n_temp_files;
2000       struct tempnode *node;
2001       char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2002       for (node = temphead; i > 0; node = node->next)
2003         tempfiles[--i] = node->name;
2004       merge (tempfiles, n_temp_files, NMERGE, output_file);
2005       free ((char *) tempfiles);
2006     }
2007 }
2008
2009 /* Insert key KEY at the end of the key list.  */
2010
2011 static void
2012 insertkey (struct keyfield *key)
2013 {
2014   struct keyfield **p;
2015
2016   for (p = &keylist; *p; p = &(*p)->next)
2017     continue;
2018   *p = key;
2019   key->next = NULL;
2020 }
2021
2022 /* Report a bad field specification SPEC, with extra info MSGID.  */
2023
2024 static void badfieldspec PARAMS ((char const *, char const *))
2025      ATTRIBUTE_NORETURN;
2026 static void
2027 badfieldspec (char const *spec, char const *msgid)
2028 {
2029   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2030          _(msgid), spec);
2031   abort ();
2032 }
2033
2034 /* Parse the leading integer in STRING and store the resulting value
2035    (which must fit into size_t) into *VAL.  Return the address of the
2036    suffix after the integer.  If MSGID is NULL, return NULL after
2037    failure; otherwise, report MSGID and exit on failure.  */
2038
2039 static char const *
2040 parse_field_count (char const *string, size_t *val, char const *msgid)
2041 {
2042   char *suffix;
2043   uintmax_t n;
2044
2045   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2046     {
2047     case LONGINT_OK:
2048     case LONGINT_INVALID_SUFFIX_CHAR:
2049       *val = n;
2050       if (*val == n)
2051         break;
2052       /* Fall through.  */
2053     case LONGINT_OVERFLOW:
2054       if (msgid)
2055         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2056                _(msgid), (int) (suffix - string), string);
2057       return NULL;
2058
2059     case LONGINT_INVALID:
2060       if (msgid)
2061         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2062                _(msgid), string);
2063       return NULL;
2064     }
2065
2066   return suffix;
2067 }
2068
2069 /* Handle interrupts and hangups. */
2070
2071 static void
2072 sighandler (int sig)
2073 {
2074 #ifndef SA_NOCLDSTOP
2075   signal (sig, SIG_IGN);
2076 #endif
2077
2078   cleanup ();
2079
2080 #ifdef SA_NOCLDSTOP
2081   {
2082     struct sigaction sigact;
2083
2084     sigact.sa_handler = SIG_DFL;
2085     sigemptyset (&sigact.sa_mask);
2086     sigact.sa_flags = 0;
2087     sigaction (sig, &sigact, NULL);
2088   }
2089 #else
2090   signal (sig, SIG_DFL);
2091 #endif
2092
2093   kill (process_id, sig);
2094 }
2095
2096 /* Set the ordering options for KEY specified in S.
2097    Return the address of the first character in S that
2098    is not a valid ordering option.
2099    BLANKTYPE is the kind of blanks that 'b' should skip. */
2100
2101 static char *
2102 set_ordering (register const char *s, struct keyfield *key,
2103               enum blanktype blanktype)
2104 {
2105   while (*s)
2106     {
2107       switch (*s)
2108         {
2109         case 'b':
2110           if (blanktype == bl_start || blanktype == bl_both)
2111             key->skipsblanks = 1;
2112           if (blanktype == bl_end || blanktype == bl_both)
2113             key->skipeblanks = 1;
2114           break;
2115         case 'd':
2116           key->ignore = nondictionary;
2117           break;
2118         case 'f':
2119           key->translate = fold_toupper;
2120           break;
2121         case 'g':
2122           key->general_numeric = 1;
2123           break;
2124         case 'i':
2125           key->ignore = nonprinting;
2126           break;
2127         case 'M':
2128           key->month = 1;
2129           break;
2130         case 'n':
2131           key->numeric = 1;
2132           break;
2133         case 'r':
2134           key->reverse = 1;
2135           break;
2136         default:
2137           return (char *) s;
2138         }
2139       ++s;
2140     }
2141   return (char *) s;
2142 }
2143
2144 static struct keyfield *
2145 new_key (void)
2146 {
2147   struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key);
2148   key->eword = -1;
2149   return key;
2150 }
2151
2152 int
2153 main (int argc, char **argv)
2154 {
2155   struct keyfield *key;
2156   struct keyfield gkey;
2157   char const *s;
2158   int i;
2159   int c = 0;
2160   int checkonly = 0, mergeonly = 0, nfiles = 0;
2161   int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);
2162   char *minus = "-", **files;
2163   char const *outfile = minus;
2164   static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2165   int nsigs = sizeof sigs / sizeof *sigs;
2166 #ifdef SA_NOCLDSTOP
2167   struct sigaction oldact, newact;
2168 #endif
2169
2170   program_name = argv[0];
2171   process_id = getpid ();
2172   setlocale (LC_ALL, "");
2173   bindtextdomain (PACKAGE, LOCALEDIR);
2174   textdomain (PACKAGE);
2175
2176 #ifdef ENABLE_NLS
2177
2178   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2179 # if HAVE_NL_LANGINFO
2180   hard_LC_TIME = hard_locale (LC_TIME);
2181 # endif
2182
2183   /* Let's get locale's representation of the decimal point */
2184   {
2185     struct lconv *lconvp = localeconv ();
2186
2187     /* If the locale doesn't define a decimal point, or if the decimal
2188        point is multibyte, use the C decimal point.  We don't support
2189        multibyte decimal points yet.  */
2190     decimal_point = *lconvp->decimal_point;
2191     if (! decimal_point || lconvp->decimal_point[1])
2192       decimal_point = C_DECIMAL_POINT;
2193
2194     /* We don't support multibyte thousands separators yet.  */
2195     th_sep = *lconvp->thousands_sep;
2196     if (! th_sep || lconvp->thousands_sep[1])
2197       th_sep = CHAR_MAX + 1;
2198   }
2199
2200 #endif /* NLS */
2201
2202   have_read_stdin = 0;
2203   inittables ();
2204
2205   /* Change the way xmalloc and xrealloc fail.  */
2206   xalloc_exit_failure = SORT_FAILURE;
2207   xalloc_fail_func = cleanup;
2208
2209 #ifdef SA_NOCLDSTOP
2210   sigemptyset (&caught_signals);
2211   for (i = 0; i < nsigs; i++)
2212     sigaddset (&caught_signals, sigs[i]);
2213   newact.sa_handler = sighandler;
2214   newact.sa_mask = caught_signals;
2215   newact.sa_flags = 0;
2216 #endif
2217
2218   for (i = 0; i < nsigs; i++)
2219     {
2220       int sig = sigs[i];
2221 #ifdef SA_NOCLDSTOP
2222       sigaction (sig, NULL, &oldact);
2223       if (oldact.sa_handler != SIG_IGN)
2224         sigaction (sig, &newact, NULL);
2225 #else
2226       if (signal (sig, SIG_IGN) != SIG_IGN)
2227         signal (sig, sighandler);
2228 #endif
2229     }
2230
2231   gkey.sword = gkey.eword = -1;
2232   gkey.ignore = NULL;
2233   gkey.translate = NULL;
2234   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2235   gkey.skipsblanks = gkey.skipeblanks = 0;
2236
2237   files = (char **) xmalloc (sizeof (char *) * argc);
2238
2239   for (;;)
2240     {
2241       /* Parse an operand as a file after "--" was seen; or if
2242          pedantic and a file was seen, unless -c was not seen and the
2243          operand is "-o FILE" or "-oFILE".  POSIX 1003.1-200x d5
2244          removes the exception for the -o options; when it becomes
2245          official the code will need to be changed.  */
2246
2247       if (c == -1
2248           || (posix_pedantic && nfiles != 0
2249               && ! (! checkonly
2250                     && optind != argc
2251                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2252                     && (argv[optind][2] || optind + 1 != argc)))
2253           || ((c = getopt_long (argc, argv,
2254                                 "-bcdfgik:mMno:rsS:t:T:uy::z",
2255                                 long_options, NULL))
2256               == -1))
2257         {
2258           if (optind == argc)
2259             break;
2260           files[nfiles++] = argv[optind++];
2261         }
2262       else switch (c)
2263         {
2264         case 1:
2265           /* Treat +POS1 [-POS2] as a key if possible; but silently
2266              treat an operand as a file if it is not a valid +POS1.
2267              POSIX 1003.1-200x d5 does not allow support for +POS1, so
2268              when it becomes official this code will need to be
2269              changed.  */
2270           key = NULL;
2271           if (optarg[0] == '+')
2272             {
2273               key = new_key ();
2274               s = parse_field_count (optarg + 1, &key->sword, NULL);
2275               if (s && *s == '.')
2276                 s = parse_field_count (s + 1, &key->schar, NULL);
2277               if (! (key->sword | key->schar))
2278                 key->sword = -1;
2279               if (! s || *set_ordering (s, key, bl_start))
2280                 {
2281                   free (key);
2282                   key = NULL;
2283                 }
2284               else
2285                 {
2286                   if (optind != argc && argv[optind][0] == '-'
2287                       && ISDIGIT (argv[optind][1]))
2288                     {
2289                       char const *optarg1 = argv[optind++];
2290                       s = parse_field_count (optarg1 + 1, &key->eword,
2291                                              N_("invalid number after `-'"));
2292                       if (*s == '.')
2293                         s = parse_field_count (s + 1, &key->echar,
2294                                                N_("invalid number after `.'"));
2295                       if (*set_ordering (s, key, bl_end))
2296                         badfieldspec (optarg1,
2297                                       N_("stray character in field spec"));
2298                     }
2299                   insertkey (key);
2300                 }
2301             }
2302           if (! key)
2303             files[nfiles++] = optarg;
2304           break;
2305
2306         case 'b':
2307         case 'd':
2308         case 'f':
2309         case 'g':
2310         case 'i':
2311         case 'M':
2312         case 'n':
2313         case 'r':
2314           {
2315             char str[2];
2316             str[0] = c;
2317             str[1] = '\0';
2318             set_ordering (str, &gkey, bl_both);
2319           }
2320           break;
2321
2322         case 'c':
2323           checkonly = 1;
2324           break;
2325
2326         case 'k':
2327           key = new_key ();
2328
2329           /* Get POS1. */
2330           s = parse_field_count (optarg, &key->sword,
2331                                  N_("invalid number at field start"));
2332           if (! key->sword--)
2333             {
2334               /* Provoke with `sort -k0' */
2335               badfieldspec (optarg, N_("field number is zero"));
2336             }
2337           if (*s == '.')
2338             {
2339               s = parse_field_count (s + 1, &key->schar,
2340                                      N_("invalid number after `.'"));
2341               if (! key->schar--)
2342                 {
2343                   /* Provoke with `sort -k1.0' */
2344                   badfieldspec (optarg, N_("character offset is zero"));
2345                 }
2346             }
2347           if (! (key->sword | key->schar))
2348             key->sword = -1;
2349           s = set_ordering (s, key, bl_start);
2350           if (*s != ',')
2351             {
2352               key->eword = -1;
2353               key->echar = 0;
2354             }
2355           else
2356             {
2357               /* Get POS2. */
2358               s = parse_field_count (s + 1, &key->eword,
2359                                      N_("invalid number after `,'"));
2360               if (! key->eword--)
2361                 {
2362                   /* Provoke with `sort -k1,0' */
2363                   badfieldspec (optarg, N_("field number is zero"));
2364                 }
2365               if (*s == '.')
2366                 s = parse_field_count (s + 1, &key->echar,
2367                                        N_("invalid number after `.'"));
2368               else
2369                 {
2370                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2371                   key->eword++;
2372                 }
2373               s = set_ordering (s, key, bl_end);
2374             }
2375           if (*s)
2376             badfieldspec (optarg, N_("stray character in field spec"));
2377           insertkey (key);
2378           break;
2379
2380         case 'm':
2381           mergeonly = 1;
2382           break;
2383
2384         case 'o':
2385           outfile = optarg;
2386           break;
2387
2388         case 's':
2389           stable = 1;
2390           break;
2391
2392         case 'S':
2393           specify_sort_size (optarg);
2394           break;
2395
2396         case 't':
2397           tab = optarg[0];
2398           if (tab && optarg[1])
2399             {
2400               /* Provoke with `sort -txx'.  Complain about
2401                  "multi-character tab" instead of "multibyte tab", so
2402                  that the diagnostic's wording does not need to be
2403                  changed once multibyte characters are supported.  */
2404               error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg);
2405             }
2406           break;
2407
2408         case 'T':
2409           add_temp_dir (optarg);
2410           break;
2411
2412         case 'u':
2413           unique = 1;
2414           break;
2415
2416         case 'y':
2417           /* Accept and ignore e.g. -y0 for compatibility with Solaris
2418              2.x through Solaris 7.  -y is marked as obsolete starting
2419              with Solaris 8.  */
2420           break;
2421
2422         case 'z':
2423           eolchar = 0;
2424           break;
2425
2426         case_GETOPT_HELP_CHAR;
2427
2428         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2429
2430         default:
2431           usage (SORT_FAILURE);
2432         }
2433     }
2434
2435   /* Inheritance of global options to individual keys. */
2436   for (key = keylist; key; key = key->next)
2437     if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2438         && !key->skipeblanks && !key->month && !key->numeric
2439         && !key->general_numeric)
2440       {
2441         key->ignore = gkey.ignore;
2442         key->translate = gkey.translate;
2443         key->skipsblanks = gkey.skipsblanks;
2444         key->skipeblanks = gkey.skipeblanks;
2445         key->month = gkey.month;
2446         key->numeric = gkey.numeric;
2447         key->general_numeric = gkey.general_numeric;
2448         key->reverse = gkey.reverse;
2449       }
2450
2451   if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks
2452                    || gkey.skipeblanks || gkey.month || gkey.numeric
2453                    || gkey.general_numeric))
2454     insertkey (&gkey);
2455   reverse = gkey.reverse;
2456
2457   if (temp_dir_count == 0)
2458     {
2459       char const *tmp_dir = getenv ("TMPDIR");
2460       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2461     }
2462
2463   if (nfiles == 0)
2464     {
2465       nfiles = 1;
2466       files = &minus;
2467     }
2468
2469   if (checkonly)
2470     {
2471       if (nfiles > 1)
2472         error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2473                files[1]);
2474
2475       /* POSIX requires that sort return 1 IFF invoked with -c and the
2476          input is not properly sorted.  */
2477       exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2478     }
2479
2480   if (mergeonly)
2481     {
2482       int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2483       merge (files, nfiles, max_merge, outfile);
2484     }
2485   else
2486     sort (files, nfiles, outfile);
2487
2488   if (have_read_stdin && fclose (stdin) == EOF)
2489     die (_("close failed"), "-");
2490
2491   cleanup ();
2492   exit (EXIT_SUCCESS);
2493 }