src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-2001 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <signal.h>
  29 #include <stdio.h>
  30 #include <assert.h>
  31 #include "system.h"
  32 #include "long-options.h"
  33 #include "error.h"
  34 #include "hard-locale.h"
  35 #include "human.h"
  36 #include "memcoll.h"
  37 #include "physmem.h"
  38 #include "stdio-safer.h"
  39 #include "xalloc.h"
  40 #include "xstrtol.h"
  41
  42 #if HAVE_SYS_RESOURCE_H
  43 # include <sys/resource.h>
  44 #endif
  45 #ifndef RLIMIT_DATA
  46 struct rlimit { size_t rlim_cur; };
  47 # define getrlimit(Resource, Rlp) (-1)
  48 #endif
  49
  50 /* The official name of this program (e.g., no `g' prefix).  */
  51 #define PROGRAM_NAME "sort"
  52
  53 #define AUTHORS N_ ("Mike Haertel and Paul Eggert")
  54
  55 #if defined ENABLE_NLS && HAVE_LANGINFO_H
  56 # include <langinfo.h>
  57 #endif
  58
  59 #ifndef SA_NOCLDSTOP
  60 # define sigprocmask(How, Set, Oset) /* empty */
  61 # define sigset_t int
  62 #endif
  63
  64 #ifndef STDC_HEADERS
  65 double strtod ();
  66 #endif
  67
  68 /* Undefine, to avoid warning about redefinition on some systems.  */
  69 /* FIXME: Remove these: use MIN/MAX from sys2.h.  */
  70 #undef min
  71 #define min(a, b) ((a) < (b) ? (a) : (b))
  72 #undef max
  73 #define max(a, b) ((a) > (b) ? (a) : (b))
  74
  75 #define UCHAR_LIM (UCHAR_MAX + 1)
  76 #define UCHAR(c) ((unsigned char) (c))
  77
  78 #ifndef DEFAULT_TMPDIR
  79 # define DEFAULT_TMPDIR "/tmp"
  80 #endif
  81
  82 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  83    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  84    that sort exit with status 1 IFF invoked with -c and the input is
  85    not properly sorted.  Any other irregular exit must exit with a
  86    status code greater than 1.  */
  87 #define SORT_FAILURE 2
  88 #define SORT_OUT_OF_ORDER 1
  89
  90 #define C_DECIMAL_POINT '.'
  91 #define NEGATION_SIGN   '-'
  92 #define NUMERIC_ZERO    '0'
  93
  94 #ifdef ENABLE_NLS
  95
  96 static char decimal_point;
  97 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
  98
  99 /* Nonzero if the corresponding locales are hard.  */
 100 static int hard_LC_COLLATE;
 101 # if HAVE_NL_LANGINFO
 102 static int hard_LC_TIME;
 103 # endif
 104
 105 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 106
 107 #else
 108
 109 # define decimal_point C_DECIMAL_POINT
 110 # define IS_THOUSANDS_SEP(x) 0
 111
 112 #endif
 113
 114 #define NONZERO(x) (x != 0)
 115
 116 /* The kind of blanks for '-b' to skip in various options. */
 117 enum blanktype { bl_start, bl_end, bl_both };
 118
 119 /* The character marking end of line. Default to \n. */
 120 static int eolchar = '\n';
 121
 122 /* Lines are held in core as counted strings. */
 123 struct line
 124 {
 125   char *text;                   /* Text of the line. */
 126   size_t length;                /* Length including final newline. */
 127   char *keybeg;                 /* Start of first key. */
 128   char *keylim;                 /* Limit of first key. */
 129 };
 130
 131 /* Input buffers. */
 132 struct buffer
 133 {
 134   char *buf;                    /* Dynamically allocated buffer,
 135                                    partitioned into 3 regions:
 136                                    - input data;
 137                                    - unused area;
 138                                    - an array of lines, in reverse order.  */
 139   size_t used;                  /* Number of bytes used for input data.  */
 140   size_t nlines;                /* Number of lines in the line array.  */
 141   size_t alloc;                 /* Number of bytes allocated. */
 142   size_t left;                  /* Number of bytes left from previous reads. */
 143   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 144   int eof;                      /* An EOF has been read.  */
 145 };
 146
 147 struct keyfield
 148 {
 149   size_t sword;                 /* Zero-origin 'word' to start at. */
 150   size_t schar;                 /* Additional characters to skip. */
 151   int skipsblanks;              /* Skip leading white space at start. */
 152   size_t eword;                 /* Zero-origin first word after field. */
 153   size_t echar;                 /* Additional characters in field. */
 154   int skipeblanks;              /* Skip trailing white space at finish. */
 155   int *ignore;                  /* Boolean array of characters to ignore. */
 156   char *translate;              /* Translation applied to characters. */
 157   int numeric;                  /* Flag for numeric comparison.  Handle
 158                                    strings of digits with optional decimal
 159                                    point, but no exponential notation. */
 160   int general_numeric;          /* Flag for general, numeric comparison.
 161                                    Handle numbers in exponential notation. */
 162   int month;                    /* Flag for comparison by month name. */
 163   int reverse;                  /* Reverse the sense of comparison. */
 164   struct keyfield *next;        /* Next keyfield to try. */
 165 };
 166
 167 struct month
 168 {
 169   char *name;
 170   int val;
 171 };
 172
 173 /* The name this program was run with. */
 174 char *program_name;
 175
 176 /* FIXME: None of these tables work with multibyte character sets.
 177    Also, there are many other bugs when handling multibyte characters,
 178    or even unibyte encodings where line boundaries are not in the
 179    initial shift state.  One way to fix this is to rewrite `sort' to
 180    use wide characters internally, but doing this with good
 181    performance is a bit tricky.  */
 182
 183 /* Table of white space. */
 184 static int blanks[UCHAR_LIM];
 185
 186 /* Table of non-printing characters. */
 187 static int nonprinting[UCHAR_LIM];
 188
 189 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 190 static int nondictionary[UCHAR_LIM];
 191
 192 /* Translation table folding lower case to upper.  */
 193 static char fold_toupper[UCHAR_LIM];
 194
 195 #define MONTHS_PER_YEAR 12
 196
 197 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 198 # define MONTHTAB_CONST /* empty */
 199 #else
 200 # define MONTHTAB_CONST const
 201 #endif
 202
 203 /* Table mapping month names to integers.
 204    Alphabetic order allows binary search. */
 205 static MONTHTAB_CONST struct month monthtab[] =
 206 {
 207   {"APR", 4},
 208   {"AUG", 8},
 209   {"DEC", 12},
 210   {"FEB", 2},
 211   {"JAN", 1},
 212   {"JUL", 7},
 213   {"JUN", 6},
 214   {"MAR", 3},
 215   {"MAY", 5},
 216   {"NOV", 11},
 217   {"OCT", 10},
 218   {"SEP", 9}
 219 };
 220
 221 /* During the merge phase, the number of files to merge at once. */
 222 #define NMERGE 16
 223
 224 /* Minimum size for a merge or check buffer.  */
 225 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 226
 227 /* Minimum sort size; the code might not work with smaller sizes.  */
 228 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 229
 230 /* The number of bytes needed for a merge or check buffer, which can
 231    function relatively efficiently even if it holds only one line.  If
 232    a longer line is seen, this value is increased.  */
 233 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 234
 235 /* The approximate maximum number of bytes of main memory to use, as
 236    specified by the user.  Zero if the user has not specified a size.  */
 237 static size_t sort_size;
 238
 239 /* The guessed size for non-regular files.  */
 240 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 241
 242 /* Array of directory names in which any temporary files are to be created. */
 243 static char const **temp_dirs;
 244
 245 /* Number of temporary directory names used.  */
 246 static size_t temp_dir_count;
 247
 248 /* Number of allocated slots in temp_dirs.  */
 249 static size_t temp_dir_alloc;
 250
 251 /* Our process ID.  */
 252 static pid_t process_id;
 253
 254 /* Flag to reverse the order of all comparisons. */
 255 static int reverse;
 256
 257 /* Flag for stable sort.  This turns off the last ditch bytewise
 258    comparison of lines, and instead leaves lines in the same order
 259    they were read if all keys compare equal.  */
 260 static int stable;
 261
 262 /* Tab character separating fields.  If NUL, then fields are separated
 263    by the empty string between a non-whitespace character and a whitespace
 264    character. */
 265 static char tab;
 266
 267 /* Flag to remove consecutive duplicate lines from the output.
 268    Only the last of a sequence of equal lines will be output. */
 269 static int unique;
 270
 271 /* Nonzero if any of the input files are the standard input. */
 272 static int have_read_stdin;
 273
 274 /* List of key field comparisons to be tried.  */
 275 static struct keyfield *keylist;
 276
 277 void
 278 usage (int status)
 279 {
 280   if (status != 0)
 281     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 282              program_name);
 283   else
 284     {
 285       printf (_("\
 286 Usage: %s [OPTION]... [FILE]...\n\
 287 "),
 288               program_name);
 289       printf (_("\
 290 Write sorted concatenation of all FILE(s) to standard output.\n\
 291 \n\
 292 Ordering options:\n\
 293 \n\
 294 Mandatory arguments to long options are mandatory for short options too.\n\
 295   -b, --ignore-leading-blanks ignore leading blanks\n\
 296   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 297   -f, --ignore-case           fold lower case to upper case characters\n\
 298   -g, --general-numeric-sort  compare according to general numerical value\n\
 299   -i, --ignore-nonprinting    consider only printable characters\n\
 300   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 301   -n, --numeric-sort          compare according to string numerical value\n\
 302   -r, --reverse               reverse the result of comparisons\n\
 303 \n\
 304 ")
 305               );
 306       printf (_("\
 307 Other options:\n\
 308 \n\
 309   -c, --check               check whether input is sorted; do not sort\n\
 310   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 311   -m, --merge               merge already sorted files; do not sort\n\
 312   -o, --output=FILE         write result to FILE instead of standard output\n\
 313   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 314   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 315   -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\
 316   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s\n\
 317                               multiple options specify multiple directories\n\
 318   -u, --unique              with -c: check for strict ordering\n\
 319                               otherwise: output only the first of an equal run\n\
 320   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 321   +POS1 [-POS2]             start a key at POS1, end it before POS2 (origin 0)\n\
 322                               Warning: this option is obsolescent\n\
 323       --help       display this help and exit\n\
 324       --version    output version information and exit\n\
 325 \n\
 326 "),
 327               DEFAULT_TMPDIR);
 328       printf (_("\
 329 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 330 in the field, both counted from one with -k, from zero with the obsolescent\n\
 331 form.  OPTS is made up of one or more single-letter ordering options, which\n\
 332 override global ordering options for that key.  If no key is given, use the\n\
 333 entire line as the key.\n\
 334 \n\
 335 SIZE may be followed by the following multiplicative suffixes:\n\
 336 %% 1%% of memory, b 1, k 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 337 \n\
 338 With no FILE, or when FILE is -, read standard input.\n\
 339 \n\
 340 *** WARNING ***\n\
 341 The locale specified by the environment affects sort order.\n\
 342 Set LC_COLLATE=C to get the traditional sort order that uses\n\
 343 native byte values.\n\
 344 ")
 345               );
 346       puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
 347     }
 348   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 349      POSIX requires that sort return 1 IFF invoked with -c and
 350      the input is not properly sorted.  */
 351   assert (status == 0 || status == SORT_FAILURE);
 352   exit (status);
 353 }
 354
 355 static struct option const long_options[] =
 356 {
 357   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 358   {"check", no_argument, NULL, 'c'},
 359   {"dictionary-order", no_argument, NULL, 'd'},
 360   {"ignore-case", no_argument, NULL, 'f'},
 361   {"general-numeric-sort", no_argument, NULL, 'g'},
 362   {"ignore-nonprinting", no_argument, NULL, 'i'},
 363   {"key", required_argument, NULL, 'k'},
 364   {"merge", no_argument, NULL, 'm'},
 365   {"month-sort", no_argument, NULL, 'M'},
 366   {"numeric-sort", no_argument, NULL, 'n'},
 367   {"output", required_argument, NULL, 'o'},
 368   {"reverse", no_argument, NULL, 'r'},
 369   {"stable", no_argument, NULL, 's'},
 370   {"buffer-size", required_argument, NULL, 'S'},
 371   {"field-separator", required_argument, NULL, 't'},
 372   {"temporary-directory", required_argument, NULL, 'T'},
 373   {"unique", no_argument, NULL, 'u'},
 374   {"zero-terminated", no_argument, NULL, 'z'},
 375   {GETOPT_HELP_OPTION_DECL},
 376   {GETOPT_VERSION_OPTION_DECL},
 377   {0, 0, 0, 0},
 378 };
 379
 380 /* The set of signals that are caught.  */
 381 static sigset_t caught_signals;
 382
 383 /* The list of temporary files. */
 384 struct tempnode
 385 {
 386   struct tempnode *volatile next;
 387   char name[1];  /* Actual size is 1 + file name length.  */
 388 };
 389 static struct tempnode *volatile temphead;
 390
 391 /* Clean up any remaining temporary files. */
 392
 393 static void
 394 cleanup (void)
 395 {
 396   struct tempnode *node;
 397
 398   for (node = temphead; node; node = node->next)
 399     unlink (node->name);
 400 }
 401
 402 /* Report MESSAGE for FILE, then clean up and exit.  */
 403
 404 static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN;
 405 static void
 406 die (char const *message, char const *file)
 407 {
 408   error (0, errno, "%s: %s", message, file);
 409   cleanup ();
 410   exit (SORT_FAILURE);
 411 }
 412
 413 /* Create a new temporary file, returning its newly allocated name.
 414    Store into *PFP a stream open for writing.  */
 415
 416 static char *
 417 create_temp_file (FILE **pfp)
 418 {
 419   static char const slashbase[] = "/sortXXXXXX";
 420   static size_t temp_dir_index;
 421   sigset_t oldset;
 422   int fd;
 423   int saved_errno;
 424   char const *temp_dir = temp_dirs[temp_dir_index];
 425   size_t len = strlen (temp_dir);
 426   struct tempnode *node =
 427     (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase);
 428   char *file = node->name;
 429
 430   memcpy (file, temp_dir, len);
 431   memcpy (file + len, slashbase, sizeof slashbase);
 432   node->next = temphead;
 433   if (++temp_dir_index == temp_dir_count)
 434     temp_dir_index = 0;
 435
 436   /* Create the temporary file in a critical section, to avoid races.  */
 437   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 438   fd = mkstemp (file);
 439   if (0 <= fd)
 440     temphead = node;
 441   saved_errno = errno;
 442   sigprocmask (SIG_SETMASK, &oldset, NULL);
 443   errno = saved_errno;
 444
 445   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 446     die (_("cannot create temporary file"), file);
 447
 448   return file;
 449 }
 450
 451 static FILE *
 452 xfopen (const char *file, const char *how)
 453 {
 454   FILE *fp;
 455
 456   if (STREQ (file, "-"))
 457     {
 458       if (*how == 'r')
 459         {
 460           have_read_stdin = 1;
 461           fp = stdin;
 462         }
 463       else
 464         fp = stdout;
 465     }
 466   else
 467     {
 468       if ((fp = fopen_safer (file, how)) == NULL)
 469         die (_("open failed"), file);
 470     }
 471
 472   return fp;
 473 }
 474
 475 /* Close FP, whose name is FILE, and report any errors.  */
 476
 477 static void
 478 xfclose (FILE *fp, char const *file)
 479 {
 480   if (fp == stdin)
 481     {
 482       /* Allow reading stdin from tty more than once. */
 483       if (feof (fp))
 484         clearerr (fp);
 485     }
 486   else
 487     {
 488       if (fclose (fp) != 0)
 489         die (_("close failed"), file);
 490     }
 491 }
 492
 493 static void
 494 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 495 {
 496   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 497     die (_("write failed"), output_file);
 498 }
 499
 500 /* Append DIR to the array of temporary directory names.  */
 501 static void
 502 add_temp_dir (char const *dir)
 503 {
 504   if (temp_dir_count == temp_dir_alloc)
 505     {
 506       temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
 507       temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
 508     }
 509
 510   temp_dirs[temp_dir_count++] = dir;
 511 }
 512
 513 /* Search through the list of temporary files for NAME;
 514    remove it if it is found on the list. */
 515
 516 static void
 517 zaptemp (const char *name)
 518 {
 519   struct tempnode *volatile *pnode;
 520   struct tempnode *node;
 521
 522   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 523     if (node->name == name)
 524       {
 525         unlink (name);
 526         *pnode = node->next;
 527         free ((char *) node);
 528         break;
 529       }
 530 }
 531
 532 #ifdef ENABLE_NLS
 533
 534 static int
 535 struct_month_cmp (const void *m1, const void *m2)
 536 {
 537   return strcmp (((const struct month *) m1)->name,
 538                  ((const struct month *) m2)->name);
 539 }
 540
 541 #endif /* NLS */
 542
 543 /* Initialize the character class tables. */
 544
 545 static void
 546 inittables (void)
 547 {
 548   int i;
 549
 550   for (i = 0; i < UCHAR_LIM; ++i)
 551     {
 552       if (ISBLANK (i))
 553         blanks[i] = 1;
 554       if (!ISPRINT (i))
 555         nonprinting[i] = 1;
 556       if (!ISALNUM (i) && !ISBLANK (i))
 557         nondictionary[i] = 1;
 558       if (ISLOWER (i))
 559         fold_toupper[i] = toupper (i);
 560       else
 561         fold_toupper[i] = i;
 562     }
 563
 564 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 565   /* If we're not in the "C" locale, read different names for months.  */
 566   if (hard_LC_TIME)
 567     {
 568       for (i = 0; i < MONTHS_PER_YEAR; i++)
 569         {
 570           char *s;
 571           size_t s_len;
 572           size_t j;
 573           char *name;
 574
 575           s = (char *) nl_langinfo (ABMON_1 + i);
 576           s_len = strlen (s);
 577           monthtab[i].name = name = (char *) xmalloc (s_len + 1);
 578           monthtab[i].val = i + 1;
 579
 580           for (j = 0; j < s_len; j++)
 581             name[j] = fold_toupper[UCHAR (s[j])];
 582           name[j] = '\0';
 583         }
 584       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 585              sizeof (struct month), struct_month_cmp);
 586     }
 587 #endif /* NLS */
 588 }
 589
 590 /* Specify the amount of main memory to use when sorting.  */
 591 static void
 592 specify_sort_size (char const *s)
 593 {
 594   uintmax_t n;
 595   char *suffix;
 596   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkmMPtTYZ");
 597
 598   /* The default unit is kB.  */
 599   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 600     {
 601       if (n <= UINTMAX_MAX / 1024)
 602         n *= 1024;
 603       else
 604         e = LONGINT_OVERFLOW;
 605     }
 606
 607   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 608   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 609     switch (suffix[0])
 610       {
 611       case 'b':
 612         e = LONGINT_OK;
 613         break;
 614
 615       case '%':
 616         {
 617           double mem = physmem_total () * n / 100;
 618
 619           /* Use "<", not "<=", to avoid problems with rounding.  */
 620           if (mem < UINTMAX_MAX)
 621             {
 622               n = mem;
 623               e = LONGINT_OK;
 624             }
 625           else
 626             e = LONGINT_OVERFLOW;
 627         }
 628         break;
 629       }
 630
 631   if (e == LONGINT_OK)
 632     {
 633       sort_size = n;
 634       if (sort_size == n)
 635         {
 636           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 637           return;
 638         }
 639
 640       e = LONGINT_OVERFLOW;
 641     }
 642
 643   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 644 }
 645
 646 /* Return the default sort size.  */
 647 static size_t
 648 default_sort_size (void)
 649 {
 650   /* Let MEM be available memory or 1/8 of total memory, whichever
 651      is greater.  */
 652   double avail = physmem_available ();
 653   double total = physmem_total ();
 654   double mem = MAX (avail, total / 8);
 655   struct rlimit rlimit;
 656
 657   /* Let SIZE be MEM, but no more than the maximum object size or
 658      system resource limits.  Avoid the MIN macro here, as it is not
 659      quite right when only one argument is floating point.  Don't
 660      bother to check for values like RLIM_INFINITY since in practice
 661      they are not much less than SIZE_MAX.  */
 662   size_t size = SIZE_MAX;
 663   if (mem < size)
 664     size = mem;
 665   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 666     size = rlimit.rlim_cur;
 667 #ifdef RLIMIT_AS
 668   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 669     size = rlimit.rlim_cur;
 670 #endif
 671
 672   /* Leave a large safety margin for the above limits, as failure can
 673      occur when they are exceeded.  */
 674   size /= 2;
 675
 676 #ifdef RLIMIT_RSS
 677   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 678      Exceeding RSS is not fatal, but can be quite slow.  */
 679   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 680     size = rlimit.rlim_cur / 16 * 15;
 681 #endif
 682
 683   /* Use no less than the minimum.  */
 684   return MAX (size, MIN_SORT_SIZE);
 685 }
 686
 687 /* Return the sort buffer size to use with the input files identified
 688    by FPS and FILES, which are alternate paths to the same files.
 689    NFILES gives the number of input files; NFPS may be less.  Assume
 690    that each input line requires LINE_BYTES extra bytes' worth of line
 691    information.  Return at most SIZE_BOUND.  */
 692
 693 static size_t
 694 sort_buffer_size (FILE *const *fps, int nfps,
 695                   char *const *files, int nfiles,
 696                   size_t line_bytes, size_t size_bound)
 697 {
 698   /* In the worst case, each input byte is a newline.  */
 699   size_t worst_case_per_input_byte = line_bytes + 1;
 700
 701   /* Keep enough room for one extra input line and an extra byte.
 702      This extra room might be needed when preparing to read EOF.  */
 703   size_t size = worst_case_per_input_byte + 1;
 704
 705   int i;
 706
 707   for (i = 0; i < nfiles; i++)
 708     {
 709       struct stat st;
 710       off_t file_size;
 711       size_t worst_case;
 712
 713       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 714            : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
 715            : stat (files[i], &st))
 716           != 0)
 717         die (_("stat failed"), files[i]);
 718
 719       file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;
 720
 721       /* Add the amount of memory needed to represent the worst case
 722          where the input consists entirely of newlines followed by a
 723          single non-newline.  Check for overflow.  */
 724       worst_case = file_size * worst_case_per_input_byte + 1;
 725       if (file_size != worst_case / worst_case_per_input_byte
 726           || size_bound - size <= worst_case)
 727         return size_bound;
 728       size += worst_case;
 729     }
 730
 731   return size;
 732 }
 733
 734 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 735    must be at least sizeof (struct line).  Allocate ALLOC bytes
 736    initially.  */
 737
 738 static void
 739 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 740 {
 741   /* Ensure that the line array is properly aligned.  If the desired
 742      size cannot be allocated, repeatedly halve it until allocation
 743      succeeds.  The smaller allocation may hurt overall performance,
 744      but that's better than failing.  */
 745   for (;;)
 746     {
 747       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 748       buf->buf = malloc (alloc);
 749       if (buf->buf)
 750         break;
 751       alloc /= 2;
 752       if (alloc <= line_bytes + 1)
 753         xalloc_die ();
 754     }
 755
 756   buf->line_bytes = line_bytes;
 757   buf->alloc = alloc;
 758   buf->used = buf->left = buf->nlines = 0;
 759   buf->eof = 0;
 760 }
 761
 762 /* Return one past the limit of the line array.  */
 763
 764 static inline struct line *
 765 buffer_linelim (struct buffer const *buf)
 766 {
 767   return (struct line *) (buf->buf + buf->alloc);
 768 }
 769
 770 /* Return a pointer to the first character of the field specified
 771    by KEY in LINE. */
 772
 773 static char *
 774 begfield (const struct line *line, const struct keyfield *key)
 775 {
 776   register char *ptr = line->text, *lim = ptr + line->length - 1;
 777   register size_t sword = key->sword;
 778   register size_t schar = key->schar;
 779
 780   if (tab)
 781     while (ptr < lim && sword--)
 782       {
 783         while (ptr < lim && *ptr != tab)
 784           ++ptr;
 785         if (ptr < lim)
 786           ++ptr;
 787       }
 788   else
 789     while (ptr < lim && sword--)
 790       {
 791         while (ptr < lim && blanks[UCHAR (*ptr)])
 792           ++ptr;
 793         while (ptr < lim && !blanks[UCHAR (*ptr)])
 794           ++ptr;
 795       }
 796
 797   if (key->skipsblanks)
 798     while (ptr < lim && blanks[UCHAR (*ptr)])
 799       ++ptr;
 800
 801   if (schar < lim - ptr)
 802     ptr += schar;
 803   else
 804     ptr = lim;
 805
 806   return ptr;
 807 }
 808
 809 /* Return the limit of (a pointer to the first character after) the field
 810    in LINE specified by KEY. */
 811
 812 static char *
 813 limfield (const struct line *line, const struct keyfield *key)
 814 {
 815   register char *ptr = line->text, *lim = ptr + line->length - 1;
 816   register size_t eword = key->eword, echar = key->echar;
 817
 818   /* Note: from the POSIX spec:
 819      The leading field separator itself is included in
 820      a field when -t is not used.  FIXME: move this comment up... */
 821
 822   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 823      whichever comes first.  If there are more than EWORD fields, leave
 824      PTR pointing at the beginning of the field having zero-based index,
 825      EWORD.  If a delimiter character was specified (via -t), then that
 826      `beginning' is the first character following the delimiting TAB.
 827      Otherwise, leave PTR pointing at the first `blank' character after
 828      the preceding field.  */
 829   if (tab)
 830     while (ptr < lim && eword--)
 831       {
 832         while (ptr < lim && *ptr != tab)
 833           ++ptr;
 834         if (ptr < lim && (eword | echar))
 835           ++ptr;
 836       }
 837   else
 838     while (ptr < lim && eword--)
 839       {
 840         while (ptr < lim && blanks[UCHAR (*ptr)])
 841           ++ptr;
 842         while (ptr < lim && !blanks[UCHAR (*ptr)])
 843           ++ptr;
 844       }
 845
 846 #ifdef POSIX_UNSPECIFIED
 847   /* The following block of code makes GNU sort incompatible with
 848      standard Unix sort, so it's ifdef'd out for now.
 849      The POSIX spec isn't clear on how to interpret this.
 850      FIXME: request clarification.
 851
 852      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 853      Date: Thu, 30 May 96 12:20:41 -0400
 854
 855      [...]I believe I've found another bug in `sort'.
 856
 857      $ cat /tmp/sort.in
 858      a b c 2 d
 859      pq rs 1 t
 860      $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
 861      a b c 2 d
 862      pq rs 1 t
 863      $ /bin/sort +0.6 -0.7 </tmp/sort.in
 864      pq rs 1 t
 865      a b c 2 d
 866
 867      Unix sort produced the answer I expected: sort on the single character
 868      in column 6.  GNU sort produced different results, because it disagrees
 869      on the interpretation of the key-end spec "-M.N".  Unix sort reads this
 870      as "skip M fields, then N characters"; but GNU sort wants it to mean
 871      "skip M fields, then either N characters or the rest of the current
 872      field, whichever comes first".  This extra clause applies only to
 873      key-ends, not key-starts.
 874      */
 875
 876   /* Make LIM point to the end of (one byte past) the current field.  */
 877   if (tab)
 878     {
 879       char *newlim;
 880       newlim = memchr (ptr, tab, lim - ptr);
 881       if (newlim)
 882         lim = newlim;
 883     }
 884   else
 885     {
 886       char *newlim;
 887       newlim = ptr;
 888       while (newlim < lim && blanks[UCHAR (*newlim)])
 889         ++newlim;
 890       while (newlim < lim && !blanks[UCHAR (*newlim)])
 891         ++newlim;
 892       lim = newlim;
 893     }
 894 #endif
 895
 896   /* If we're skipping leading blanks, don't start counting characters
 897      until after skipping past any leading blanks.  */
 898   if (key->skipsblanks)
 899     while (ptr < lim && blanks[UCHAR (*ptr)])
 900       ++ptr;
 901
 902   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 903   if (echar < lim - ptr)
 904     ptr += echar;
 905   else
 906     ptr = lim;
 907
 908   return ptr;
 909 }
 910
 911 /* FIXME */
 912
 913 static void
 914 trim_trailing_blanks (const char *a_start, char **a_end)
 915 {
 916   while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
 917     --(*a_end);
 918 }
 919
 920 /* Fill BUF reading from FP, moving buf->left bytes from the end
 921    of buf->buf to the beginning first.  If EOF is reached and the
 922    file wasn't terminated by a newline, supply one.  Set up BUF's line
 923    table too.  FILE is the name of the file corresponding to FP.
 924    Return nonzero if some input was read.  */
 925
 926 static int
 927 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
 928 {
 929   struct keyfield const *key = keylist;
 930   char eol = eolchar;
 931   size_t line_bytes = buf->line_bytes;
 932   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
 933
 934   if (buf->eof)
 935     return 0;
 936
 937   if (buf->used != buf->left)
 938     {
 939       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 940       buf->used = buf->left;
 941       buf->nlines = 0;
 942     }
 943
 944   for (;;)
 945     {
 946       char *ptr = buf->buf + buf->used;
 947       struct line *linelim = buffer_linelim (buf);
 948       struct line *line = linelim - buf->nlines;
 949       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
 950       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
 951
 952       while (line_bytes + 1 < avail)
 953         {
 954           /* Read as many bytes as possible, but do not read so many
 955              bytes that there might not be enough room for the
 956              corresponding line array.  The worst case is when the
 957              rest of the input file consists entirely of newlines,
 958              except that the last byte is not a newline.  */
 959           size_t readsize = (avail - 1) / (line_bytes + 1);
 960           size_t bytes_read = fread (ptr, 1, readsize, fp);
 961           char *ptrlim = ptr + bytes_read;
 962           char *p;
 963           avail -= bytes_read;
 964
 965           if (bytes_read != readsize)
 966             {
 967               if (ferror (fp))
 968                 die (_("read failed"), file);
 969               if (feof (fp))
 970                 {
 971                   buf->eof = 1;
 972                   if (buf->buf == ptrlim)
 973                     return 0;
 974                   if (ptrlim[-1] != eol)
 975                     *ptrlim++ = eol;
 976                 }
 977             }
 978
 979           /* Find and record each line in the just-read input.  */
 980           while ((p = memchr (ptr, eol, ptrlim - ptr)))
 981             {
 982               ptr = p + 1;
 983               line--;
 984               line->text = line_start;
 985               line->length = ptr - line_start;
 986               mergesize = MAX (mergesize, line->length);
 987               avail -= line_bytes;
 988
 989               if (key)
 990                 {
 991                   /* Precompute the position of the first key for
 992                      efficiency. */
 993                   line->keylim = key->eword == -1 ? p : limfield (line, key);
 994
 995                   if (key->sword != -1)
 996                     line->keybeg = begfield (line, key);
 997                   else
 998                     {
 999                       if (key->skipsblanks)
1000                         while (blanks[UCHAR (*line_start)])
1001                           line_start++;
1002                       line->keybeg = line_start;
1003                     }
1004                   if (key->skipeblanks)
1005                     trim_trailing_blanks (line->keybeg, &line->keylim);
1006                 }
1007
1008               line_start = ptr;
1009             }
1010
1011           ptr = ptrlim;
1012           if (buf->eof)
1013             break;
1014         }
1015
1016       buf->used = ptr - buf->buf;
1017       buf->nlines = buffer_linelim (buf) - line;
1018       if (buf->nlines != 0)
1019         {
1020           buf->left = ptr - line_start;
1021           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1022           return 1;
1023         }
1024
1025       /* The current input line is too long to fit in the buffer.
1026          Double the buffer size and try again.  */
1027       if (2 * buf->alloc < buf->alloc)
1028         xalloc_die ();
1029       buf->alloc *= 2;
1030       buf->buf = xrealloc (buf->buf, buf->alloc);
1031     }
1032 }
1033
1034 /* Compare strings A and B containing decimal fractions < 1.  Each string
1035    should begin with a decimal point followed immediately by the digits
1036    of the fraction.  Strings not of this form are considered to be zero. */
1037
1038 /* The goal here, is to take two numbers a and b... compare these
1039    in parallel.  Instead of converting each, and then comparing the
1040    outcome.  Most likely stopping the comparison before the conversion
1041    is complete.  The algorithm used, in the old sort:
1042
1043    Algorithm: fraccompare
1044    Action   : compare two decimal fractions
1045    accepts  : char *a, char *b
1046    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1047    implement:
1048
1049    if *a == decimal_point AND *b == decimal_point
1050      find first character different in a and b.
1051      if both are digits, return the difference *a - *b.
1052      if *a is a digit
1053        skip past zeros
1054        if digit return 1, else 0
1055      if *b is a digit
1056        skip past zeros
1057        if digit return -1, else 0
1058    if *a is a decimal_point
1059      skip past decimal_point and zeros
1060      if digit return 1, else 0
1061    if *b is a decimal_point
1062      skip past decimal_point and zeros
1063      if digit return -1, else 0
1064    return 0 */
1065
1066 static int
1067 fraccompare (register const char *a, register const char *b)
1068 {
1069   if (*a == decimal_point && *b == decimal_point)
1070     {
1071       while (*++a == *++b)
1072         if (! ISDIGIT (*a))
1073           return 0;
1074       if (ISDIGIT (*a) && ISDIGIT (*b))
1075         return *a - *b;
1076       if (ISDIGIT (*a))
1077         goto a_trailing_nonzero;
1078       if (ISDIGIT (*b))
1079         goto b_trailing_nonzero;
1080       return 0;
1081     }
1082   else if (*a++ == decimal_point)
1083     {
1084     a_trailing_nonzero:
1085       while (*a == NUMERIC_ZERO)
1086         a++;
1087       return ISDIGIT (*a);
1088     }
1089   else if (*b++ == decimal_point)
1090     {
1091     b_trailing_nonzero:
1092       while (*b == NUMERIC_ZERO)
1093         b++;
1094       return - ISDIGIT (*b);
1095     }
1096   return 0;
1097 }
1098
1099 /* Compare strings A and B as numbers without explicitly converting them to
1100    machine numbers.  Comparatively slow for short strings, but asymptotically
1101    hideously fast. */
1102
1103 static int
1104 numcompare (register const char *a, register const char *b)
1105 {
1106   register int tmpa, tmpb, tmp;
1107   register size_t loga, logb;
1108
1109   tmpa = *a;
1110   tmpb = *b;
1111
1112   while (blanks[UCHAR (tmpa)])
1113     tmpa = *++a;
1114   while (blanks[UCHAR (tmpb)])
1115     tmpb = *++b;
1116
1117   if (tmpa == NEGATION_SIGN)
1118     {
1119       do
1120         tmpa = *++a;
1121       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1122       if (tmpb != NEGATION_SIGN)
1123         {
1124           if (tmpa == decimal_point)
1125             do
1126               tmpa = *++a;
1127             while (tmpa == NUMERIC_ZERO);
1128           if (ISDIGIT (tmpa))
1129             return -1;
1130           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1131             tmpb = *++b;
1132           if (tmpb == decimal_point)
1133             do
1134               tmpb = *++b;
1135             while (tmpb == NUMERIC_ZERO);
1136           if (ISDIGIT (tmpb))
1137             return -1;
1138           return 0;
1139         }
1140       do
1141         tmpb = *++b;
1142       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1143
1144       while (tmpa == tmpb && ISDIGIT (tmpa))
1145         {
1146           do
1147             tmpa = *++a;
1148           while (IS_THOUSANDS_SEP (tmpa));
1149           do
1150             tmpb = *++b;
1151           while (IS_THOUSANDS_SEP (tmpb));
1152         }
1153
1154       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1155           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1156         return -fraccompare (a, b);
1157
1158       tmp = tmpb - tmpa;
1159
1160       for (loga = 0; ISDIGIT (tmpa); ++loga)
1161         do
1162           tmpa = *++a;
1163         while (IS_THOUSANDS_SEP (tmpa));
1164
1165       for (logb = 0; ISDIGIT (tmpb); ++logb)
1166         do
1167           tmpb = *++b;
1168         while (IS_THOUSANDS_SEP (tmpb));
1169
1170       if (loga != logb)
1171         return loga < logb ? 1 : -1;
1172
1173       if (!loga)
1174         return 0;
1175
1176       return tmp;
1177     }
1178   else if (tmpb == NEGATION_SIGN)
1179     {
1180       do
1181         tmpb = *++b;
1182       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1183       if (tmpb == decimal_point)
1184         do
1185           tmpb = *++b;
1186         while (tmpb == NUMERIC_ZERO);
1187       if (ISDIGIT (tmpb))
1188         return 1;
1189       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1190         tmpa = *++a;
1191       if (tmpa == decimal_point)
1192         do
1193           tmpa = *++a;
1194         while (tmpa == NUMERIC_ZERO);
1195       if (ISDIGIT (tmpa))
1196         return 1;
1197       return 0;
1198     }
1199   else
1200     {
1201       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1202         tmpa = *++a;
1203       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1204         tmpb = *++b;
1205
1206       while (tmpa == tmpb && ISDIGIT (tmpa))
1207         {
1208           do
1209             tmpa = *++a;
1210           while (IS_THOUSANDS_SEP (tmpa));
1211           do
1212             tmpb = *++b;
1213           while (IS_THOUSANDS_SEP (tmpb));
1214         }
1215
1216       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1217           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1218         return fraccompare (a, b);
1219
1220       tmp = tmpa - tmpb;
1221
1222       for (loga = 0; ISDIGIT (tmpa); ++loga)
1223         do
1224           tmpa = *++a;
1225         while (IS_THOUSANDS_SEP (tmpa));
1226
1227       for (logb = 0; ISDIGIT (tmpb); ++logb)
1228         do
1229           tmpb = *++b;
1230         while (IS_THOUSANDS_SEP (tmpb));
1231
1232       if (loga != logb)
1233         return loga < logb ? -1 : 1;
1234
1235       if (!loga)
1236         return 0;
1237
1238       return tmp;
1239     }
1240 }
1241
1242 static int
1243 general_numcompare (const char *sa, const char *sb)
1244 {
1245   /* FIXME: add option to warn about failed conversions.  */
1246   /* FIXME: maybe add option to try expensive FP conversion
1247      only if A and B can't be compared more cheaply/accurately.  */
1248
1249   char *ea;
1250   char *eb;
1251   double a = strtod (sa, &ea);
1252   double b = strtod (sb, &eb);
1253
1254   /* Put conversion errors at the start of the collating sequence.  */
1255   if (sa == ea)
1256     return sb == eb ? 0 : -1;
1257   if (sb == eb)
1258     return 1;
1259
1260   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1261      conversion errors but before numbers; sort them by internal
1262      bit-pattern, for lack of a more portable alternative.  */
1263   return (a < b ? -1
1264           : a > b ? 1
1265           : a == b ? 0
1266           : b == b ? -1
1267           : a == a ? 1
1268           : memcmp ((char *) &a, (char *) &b, sizeof a));
1269 }
1270
1271 /* Return an integer in 1..12 of the month name S with length LEN.
1272    Return 0 if the name in S is not recognized.  */
1273
1274 static int
1275 getmonth (const char *s, size_t len)
1276 {
1277   char *month;
1278   register size_t i;
1279   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1280
1281   while (len > 0 && blanks[UCHAR (*s)])
1282     {
1283       ++s;
1284       --len;
1285     }
1286
1287   if (len == 0)
1288     return 0;
1289
1290   month = (char *) alloca (len + 1);
1291   for (i = 0; i < len; ++i)
1292     month[i] = fold_toupper[UCHAR (s[i])];
1293   while (blanks[UCHAR (month[i - 1])])
1294     --i;
1295   month[i] = '\0';
1296
1297   do
1298     {
1299       int ix = (lo + hi) / 2;
1300
1301       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1302         hi = ix;
1303       else
1304         lo = ix;
1305     }
1306   while (hi - lo > 1);
1307
1308   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1309             ? monthtab[lo].val : 0);
1310
1311   return result;
1312 }
1313
1314 /* Compare two lines A and B trying every key in sequence until there
1315    are no more keys or a difference is found. */
1316
1317 static int
1318 keycompare (const struct line *a, const struct line *b)
1319 {
1320   struct keyfield *key = keylist;
1321
1322   /* For the first iteration only, the key positions have been
1323      precomputed for us. */
1324   register char *texta = a->keybeg;
1325   register char *textb = b->keybeg;
1326   register char *lima = a->keylim;
1327   register char *limb = b->keylim;
1328
1329   int diff;
1330
1331   for (;;)
1332     {
1333       register unsigned char *translate = (unsigned char *) key->translate;
1334       register int *ignore = key->ignore;
1335
1336       /* Find the lengths. */
1337       size_t lena = lima <= texta ? 0 : lima - texta;
1338       size_t lenb = limb <= textb ? 0 : limb - textb;
1339
1340       if (key->skipeblanks)
1341         {
1342           char *a_end = texta + lena;
1343           char *b_end = textb + lenb;
1344           trim_trailing_blanks (texta, &a_end);
1345           trim_trailing_blanks (textb, &b_end);
1346           lena = a_end - texta;
1347           lenb = b_end - textb;
1348         }
1349
1350       /* Actually compare the fields. */
1351       if (key->numeric | key->general_numeric)
1352         {
1353           char savea = *lima, saveb = *limb;
1354
1355           *lima = *limb = '\0';
1356           diff = ((key->numeric ? numcompare : general_numcompare)
1357                   (texta, textb));
1358           *lima = savea, *limb = saveb;
1359         }
1360       else if (key->month)
1361         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1362 #ifdef ENABLE_NLS
1363       /* Sorting like this may become slow, so in a simple locale the user
1364          can select a faster sort that is similar to ascii sort  */
1365       else if (hard_LC_COLLATE)
1366         {
1367           if (ignore || translate)
1368             {
1369               char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1370               char *copy_b = copy_a + lena + 1;
1371               size_t new_len_a, new_len_b, i;
1372
1373               /* Ignore and/or translate chars before comparing.  */
1374               for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1375                 {
1376                   if (i < lena)
1377                     {
1378                       copy_a[new_len_a] = (translate
1379                                            ? translate[UCHAR (texta[i])]
1380                                            : texta[i]);
1381                       if (!ignore || !ignore[UCHAR (texta[i])])
1382                         ++new_len_a;
1383                     }
1384                   if (i < lenb)
1385                     {
1386                       copy_b[new_len_b] = (translate
1387                                            ? translate[UCHAR (textb[i])]
1388                                            : textb [i]);
1389                       if (!ignore || !ignore[UCHAR (textb[i])])
1390                         ++new_len_b;
1391                     }
1392                 }
1393
1394               diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
1395             }
1396           else if (lena == 0)
1397             diff = - NONZERO (lenb);
1398           else if (lenb == 0)
1399             goto greater;
1400           else
1401             diff = memcoll (texta, lena, textb, lenb);
1402         }
1403 #endif
1404       else if (ignore)
1405         {
1406 #define CMP_WITH_IGNORE(A, B)                                           \
1407   do                                                                    \
1408     {                                                                   \
1409           for (;;)                                                      \
1410             {                                                           \
1411               while (texta < lima && ignore[UCHAR (*texta)])            \
1412                 ++texta;                                                \
1413               while (textb < limb && ignore[UCHAR (*textb)])            \
1414                 ++textb;                                                \
1415               if (! (texta < lima && textb < limb))                     \
1416                 break;                                                  \
1417               diff = UCHAR (A) - UCHAR (B);                             \
1418               if (diff)                                                 \
1419                 goto not_equal;                                         \
1420               ++texta;                                                  \
1421               ++textb;                                                  \
1422             }                                                           \
1423                                                                         \
1424           diff = (lima - texta) - (limb - textb);                       \
1425     }                                                                   \
1426   while (0)
1427
1428           if (translate)
1429             CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1430                              translate[UCHAR (*textb)]);
1431           else
1432             CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1433         }
1434       else if (lena == 0)
1435         diff = - NONZERO (lenb);
1436       else if (lenb == 0)
1437         goto greater;
1438       else
1439         {
1440           if (translate)
1441             {
1442               while (texta < lima && textb < limb)
1443                 {
1444                   diff = (UCHAR (translate[UCHAR (*texta++)])
1445                           - UCHAR (translate[UCHAR (*textb++)]));
1446                   if (diff)
1447                     goto not_equal;
1448                 }
1449             }
1450           else
1451             {
1452               diff = memcmp (texta, textb, min (lena, lenb));
1453               if (diff)
1454                 goto not_equal;
1455             }
1456           diff = lena < lenb ? -1 : lena != lenb;
1457         }
1458
1459       if (diff)
1460         goto not_equal;
1461
1462       key = key->next;
1463       if (! key)
1464         break;
1465
1466       /* Find the beginning and limit of the next field.  */
1467       if (key->eword != -1)
1468         lima = limfield (a, key), limb = limfield (b, key);
1469       else
1470         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1471
1472       if (key->sword != -1)
1473         texta = begfield (a, key), textb = begfield (b, key);
1474       else
1475         {
1476           texta = a->text, textb = b->text;
1477           if (key->skipsblanks)
1478             {
1479               while (texta < lima && blanks[UCHAR (*texta)])
1480                 ++texta;
1481               while (textb < limb && blanks[UCHAR (*textb)])
1482                 ++textb;
1483             }
1484         }
1485     }
1486
1487   return 0;
1488
1489  greater:
1490   diff = 1;
1491  not_equal:
1492   return key->reverse ? -diff : diff;
1493 }
1494
1495 /* Compare two lines A and B, returning negative, zero, or positive
1496    depending on whether A compares less than, equal to, or greater than B. */
1497
1498 static int
1499 compare (register const struct line *a, register const struct line *b)
1500 {
1501   int diff;
1502   size_t alen, blen;
1503
1504   /* First try to compare on the specified keys (if any).
1505      The only two cases with no key at all are unadorned sort,
1506      and unadorned sort -r. */
1507   if (keylist)
1508     {
1509       diff = keycompare (a, b);
1510       alloca (0);
1511       if (diff != 0 || unique || stable)
1512         return diff;
1513     }
1514
1515   /* If the keys all compare equal (or no keys were specified)
1516      fall through to the default comparison.  */
1517   alen = a->length - 1, blen = b->length - 1;
1518
1519   if (alen == 0)
1520     diff = - NONZERO (blen);
1521   else if (blen == 0)
1522     diff = NONZERO (alen);
1523 #ifdef ENABLE_NLS
1524   else if (hard_LC_COLLATE)
1525     diff = memcoll (a->text, alen, b->text, blen);
1526 #endif
1527   else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
1528     diff = alen < blen ? -1 : alen != blen;
1529
1530   return reverse ? -diff : diff;
1531 }
1532
1533 /* Check that the lines read from the given FP come in order.  Print a
1534    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1535    one if they are not in order.  Otherwise, print no diagnostic
1536    and return zero.  */
1537
1538 static int
1539 checkfp (FILE *fp, char *file_name)
1540 {
1541   struct buffer buf;            /* Input buffer. */
1542   struct line temp;             /* Copy of previous line. */
1543   size_t alloc = 0;
1544   uintmax_t line_number = 0;
1545   struct keyfield *key = keylist;
1546   int nonunique = 1 - unique;
1547   int disordered = 0;
1548
1549   initbuf (&buf, sizeof (struct line),
1550            MAX (merge_buffer_size, sort_size));
1551   temp.text = NULL;
1552
1553   while (fillbuf (&buf, fp, file_name))
1554     {
1555       struct line const *line = buffer_linelim (&buf);
1556       struct line const *linebase = line - buf.nlines;
1557
1558       /* Make sure the line saved from the old buffer contents is
1559          less than or equal to the first line of the new buffer. */
1560       if (alloc && nonunique <= compare (&temp, line - 1))
1561         {
1562         found_disorder:
1563           {
1564             char hr_buf[LONGEST_HUMAN_READABLE + 1];
1565             struct line const *disorder_line = line - 1;
1566             uintmax_t disorder_line_number =
1567               buffer_linelim (&buf) - disorder_line + line_number;
1568             fprintf (stderr, _("%s: %s:%s: disorder: "),
1569                      program_name, file_name,
1570                      human_readable (disorder_line_number, hr_buf, 1, 1));
1571             write_bytes (disorder_line->text, disorder_line->length, stderr,
1572                          _("standard error"));
1573             disordered = 1;
1574             break;
1575           }
1576         }
1577
1578       /* Compare each line in the buffer with its successor.  */
1579       while (linebase < --line)
1580         if (nonunique <= compare (line, line - 1))
1581           goto found_disorder;
1582
1583       line_number += buf.nlines;
1584
1585       /* Save the last line of the buffer.  */
1586       if (alloc < line->length)
1587         {
1588           do
1589             {
1590               alloc *= 2;
1591               if (! alloc)
1592                 {
1593                   alloc = line->length;
1594                   break;
1595                 }
1596             }
1597           while (alloc < line->length);
1598
1599           temp.text = xrealloc (temp.text, alloc);
1600         }
1601       memcpy (temp.text, line->text, line->length);
1602       temp.length = line->length;
1603       if (key)
1604         {
1605           temp.keybeg = temp.text + (line->keybeg - line->text);
1606           temp.keylim = temp.text + (line->keylim - line->text);
1607         }
1608     }
1609
1610   xfclose (fp, file_name);
1611   free (buf.buf);
1612   if (temp.text)
1613     free (temp.text);
1614   return disordered;
1615 }
1616
1617 /* Merge lines from FILES onto OFP.  NFILES cannot be greater than
1618    NMERGE.  Close input and output files before returning.
1619    OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1620    output file has not been opened yet.  */
1621
1622 static void
1623 mergefps (char **files, register int nfiles,
1624           FILE *ofp, const char *output_file)
1625 {
1626   FILE *fps[NMERGE];            /* Input streams for each file.  */
1627   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1628   struct line saved;            /* Saved line storage for unique check. */
1629   struct line const *savedline = NULL;
1630                                 /* &saved if there is a saved line. */
1631   size_t savealloc = 0;         /* Size allocated for the saved line. */
1632   struct line const *cur[NMERGE]; /* Current line in each line table. */
1633   struct line const *base[NMERGE]; /* Base of each line table.  */
1634   int ord[NMERGE];              /* Table representing a permutation of fps,
1635                                    such that cur[ord[0]] is the smallest line
1636                                    and will be next output. */
1637   register int i, j, t;
1638   struct keyfield *key = keylist;
1639   saved.text = NULL;
1640
1641   /* Read initial lines from each input file. */
1642   for (i = 0; i < nfiles; ++i)
1643     {
1644       fps[i] = xfopen (files[i], "r");
1645       initbuf (&buffer[i], sizeof (struct line),
1646                MAX (merge_buffer_size, sort_size / nfiles));
1647       /* If a file is empty, eliminate it from future consideration. */
1648       while (i < nfiles && !fillbuf (&buffer[i], fps[i], files[i]))
1649         {
1650           xfclose (fps[i], files[i]);
1651           zaptemp (files[i]);
1652           --nfiles;
1653           for (j = i; j < nfiles; ++j)
1654             files[j] = files[j + 1];
1655         }
1656       if (i == nfiles)
1657         free (buffer[i].buf);
1658       else
1659         {
1660           struct line const *linelim = buffer_linelim (&buffer[i]);
1661           cur[i] = linelim - 1;
1662           base[i] = linelim - buffer[i].nlines;
1663         }
1664     }
1665
1666   if (! ofp)
1667     ofp = xfopen (output_file, "w");
1668
1669   /* Set up the ord table according to comparisons among input lines.
1670      Since this only reorders two items if one is strictly greater than
1671      the other, it is stable. */
1672   for (i = 0; i < nfiles; ++i)
1673     ord[i] = i;
1674   for (i = 1; i < nfiles; ++i)
1675     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1676       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1677
1678   /* Repeatedly output the smallest line until no input remains. */
1679   while (nfiles)
1680     {
1681       struct line const *smallest = cur[ord[0]];
1682
1683       /* If uniquified output is turned on, output only the first of
1684          an identical series of lines. */
1685       if (unique)
1686         {
1687           if (savedline && compare (savedline, smallest))
1688             {
1689               savedline = 0;
1690               write_bytes (saved.text, saved.length, ofp, output_file);
1691             }
1692           if (!savedline)
1693             {
1694               savedline = &saved;
1695               if (savealloc < smallest->length)
1696                 {
1697                   do
1698                     if (! savealloc)
1699                       {
1700                         savealloc = smallest->length;
1701                         break;
1702                       }
1703                   while ((savealloc *= 2) < smallest->length);
1704
1705                   saved.text = xrealloc (saved.text, savealloc);
1706                 }
1707               saved.length = smallest->length;
1708               memcpy (saved.text, smallest->text, saved.length);
1709               if (key)
1710                 {
1711                   saved.keybeg =
1712                     saved.text + (smallest->keybeg - smallest->text);
1713                   saved.keylim =
1714                     saved.text + (smallest->keylim - smallest->text);
1715                 }
1716             }
1717         }
1718       else
1719         write_bytes (smallest->text, smallest->length, ofp, output_file);
1720
1721       /* Check if we need to read more lines into core. */
1722       if (base[ord[0]] < smallest)
1723         cur[ord[0]] = smallest - 1;
1724       else
1725         {
1726           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1727             {
1728               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1729               cur[ord[0]] = linelim - 1;
1730               base[ord[0]] = linelim - buffer[ord[0]].nlines;
1731             }
1732           else
1733             {
1734               /* We reached EOF on fps[ord[0]]. */
1735               for (i = 1; i < nfiles; ++i)
1736                 if (ord[i] > ord[0])
1737                   --ord[i];
1738               --nfiles;
1739               xfclose (fps[ord[0]], files[ord[0]]);
1740               zaptemp (files[ord[0]]);
1741               free (buffer[ord[0]].buf);
1742               for (i = ord[0]; i < nfiles; ++i)
1743                 {
1744                   fps[i] = fps[i + 1];
1745                   files[i] = files[i + 1];
1746                   buffer[i] = buffer[i + 1];
1747                   cur[i] = cur[i + 1];
1748                   base[i] = base[i + 1];
1749                 }
1750               for (i = 0; i < nfiles; ++i)
1751                 ord[i] = ord[i + 1];
1752               continue;
1753             }
1754         }
1755
1756       /* The new line just read in may be larger than other lines
1757          already in core; push it back in the queue until we encounter
1758          a line larger than it. */
1759       for (i = 1; i < nfiles; ++i)
1760         {
1761           t = compare (cur[ord[0]], cur[ord[i]]);
1762           if (!t)
1763             t = ord[0] - ord[i];
1764           if (t < 0)
1765             break;
1766         }
1767       t = ord[0];
1768       for (j = 1; j < i; ++j)
1769         ord[j - 1] = ord[j];
1770       ord[i - 1] = t;
1771     }
1772
1773   if (unique && savedline)
1774     {
1775       write_bytes (saved.text, saved.length, ofp, output_file);
1776       free (saved.text);
1777     }
1778
1779   xfclose (ofp, output_file);
1780 }
1781
1782 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1783    The input and output arrays are in reverse order, and LINES and
1784    TEMP point just past the end of their respective arrays.  */
1785
1786 static void
1787 sortlines (struct line *lines, size_t nlines, struct line *temp)
1788 {
1789   register struct line *lo, *hi, *t;
1790   register size_t nlo, nhi;
1791
1792   if (nlines == 2)
1793     {
1794       if (0 < compare (&lines[-1], &lines[-2]))
1795         {
1796           struct line tmp = lines[-1];
1797           lines[-1] = lines[-2];
1798           lines[-2] = tmp;
1799         }
1800       return;
1801     }
1802
1803   nlo = nlines / 2;
1804   lo = lines;
1805   nhi = nlines - nlo;
1806   hi = lines - nlo;
1807
1808   if (nlo > 1)
1809     sortlines (lo, nlo, temp);
1810
1811   if (nhi > 1)
1812     sortlines (hi, nhi, temp);
1813
1814   t = temp;
1815
1816   while (nlo && nhi)
1817     if (compare (lo - 1, hi - 1) <= 0)
1818       *--t = *--lo, --nlo;
1819     else
1820       *--t = *--hi, --nhi;
1821   while (nlo--)
1822     *--t = *--lo;
1823
1824   for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1825     *--lo = *--t;
1826 }
1827
1828 /* Return the index of the first of NFILES FILES that is the same file
1829    as OUTFILE.  If none can be the same, return NFILES.  Consider an
1830    input pipe to be the same as OUTFILE, since the pipe might be the
1831    output of a command like "cat OUTFILE".  */
1832
1833 static int
1834 first_same_file (char **files, int nfiles, char const *outfile)
1835 {
1836   int i;
1837   int got_outstat = 0;
1838   struct stat instat, outstat;
1839
1840   for (i = 0; i < nfiles; i++)
1841     {
1842       int standard_input = STREQ (files[i], "-");
1843
1844       if (STREQ (outfile, files[i]) && ! standard_input)
1845         return i;
1846
1847       if (! got_outstat)
1848         {
1849           got_outstat = 1;
1850           if ((STREQ (outfile, "-")
1851                ? fstat (STDOUT_FILENO, &outstat)
1852                : stat (outfile, &outstat))
1853               != 0)
1854             return nfiles;
1855         }
1856
1857       if (((standard_input
1858             ? fstat (STDIN_FILENO, &instat)
1859             : stat (files[i], &instat))
1860            == 0)
1861           && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1862         return i;
1863     }
1864
1865   return nfiles;
1866 }
1867
1868 /* Check that each of the NFILES FILES is ordered.
1869    Return a count of disordered files. */
1870
1871 static int
1872 check (char **files, int nfiles)
1873 {
1874   int i, disorders = 0;
1875   FILE *fp;
1876
1877   for (i = 0; i < nfiles; ++i)
1878     {
1879       fp = xfopen (files[i], "r");
1880       disorders += checkfp (fp, files[i]);
1881     }
1882   return disorders;
1883 }
1884
1885 /* Merge NFILES FILES onto OUTPUT_FILE.  However, merge at most
1886    MAX_MERGE input files directly onto OUTPUT_FILE.  MAX_MERGE cannot
1887    exceed NMERGE.  */
1888
1889 static void
1890 merge (char **files, int nfiles, int max_merge, char const *output_file)
1891 {
1892   while (max_merge < nfiles)
1893     {
1894       FILE *tfp;
1895       int i, t = 0;
1896       char *temp;
1897       for (i = 0; i < nfiles / NMERGE; ++i)
1898         {
1899           temp = create_temp_file (&tfp);
1900           mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1901           files[t++] = temp;
1902         }
1903       temp = create_temp_file (&tfp);
1904       mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1905       files[t++] = temp;
1906       nfiles = t;
1907       if (nfiles == 1)
1908         break;
1909     }
1910
1911   mergefps (files, nfiles, NULL, output_file);
1912 }
1913
1914 /* Sort NFILES FILES onto OUTPUT_FILE. */
1915
1916 static void
1917 sort (char **files, int nfiles, char const *output_file)
1918 {
1919   struct buffer buf;
1920   int n_temp_files = 0;
1921   int output_file_created = 0;
1922
1923   static size_t size;
1924   if (! size && ! (size = sort_size))
1925     size = default_sort_size ();
1926
1927   buf.alloc = 0;
1928
1929   while (nfiles)
1930     {
1931       char const *temp_output;
1932       char const *file = *files;
1933       FILE *fp = xfopen (file, "r");
1934       FILE *tfp;
1935
1936       if (! buf.alloc)
1937         initbuf (&buf, 2 * sizeof (struct line),
1938                  sort_buffer_size (&fp, 1, files, nfiles,
1939                                    2 * sizeof (struct line), size));
1940       buf.eof = 0;
1941       files++;
1942       nfiles--;
1943
1944       while (fillbuf (&buf, fp, file))
1945         {
1946           struct line *line;
1947           struct line *linebase;
1948
1949           if (buf.eof && nfiles
1950               && (2 * sizeof (struct line) + 1
1951                   < (buf.alloc - buf.used
1952                      - 2 * sizeof (struct line) * buf.nlines)))
1953             {
1954               /* End of file, but there is more input and buffer room.
1955                  Concatenate the next input file; this is faster in
1956                  the usual case.  */
1957               buf.left = buf.used;
1958               break;
1959             }
1960
1961           line = buffer_linelim (&buf);
1962           linebase = line - buf.nlines;
1963           sortlines (line, buf.nlines, linebase);
1964           if (buf.eof && !nfiles && !n_temp_files && !buf.left)
1965             {
1966               xfclose (fp, file);
1967               tfp = xfopen (output_file, "w");
1968               temp_output = output_file;
1969               output_file_created = 1;
1970             }
1971           else
1972             {
1973               ++n_temp_files;
1974               temp_output = create_temp_file (&tfp);
1975             }
1976
1977           do
1978             {
1979               line--;
1980               write_bytes (line->text, line->length, tfp, temp_output);
1981               if (unique)
1982                 while (linebase < line && compare (line, line - 1) == 0)
1983                   line--;
1984             }
1985           while (linebase < line);
1986
1987           xfclose (tfp, temp_output);
1988
1989           if (output_file_created)
1990             goto finish;
1991         }
1992       xfclose (fp, file);
1993     }
1994
1995  finish:
1996   free (buf.buf);
1997
1998   if (! output_file_created)
1999     {
2000       int i = n_temp_files;
2001       struct tempnode *node;
2002       char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2003       for (node = temphead; i > 0; node = node->next)
2004         tempfiles[--i] = node->name;
2005       merge (tempfiles, n_temp_files, NMERGE, output_file);
2006       free ((char *) tempfiles);
2007     }
2008 }
2009
2010 /* Insert key KEY at the end of the key list.  */
2011
2012 static void
2013 insertkey (struct keyfield *key)
2014 {
2015   struct keyfield **p;
2016
2017   for (p = &keylist; *p; p = &(*p)->next)
2018     continue;
2019   *p = key;
2020   key->next = NULL;
2021 }
2022
2023 /* Report a bad field specification SPEC, with extra info MSGID.  */
2024
2025 static void badfieldspec PARAMS ((char const *, char const *))
2026      ATTRIBUTE_NORETURN;
2027 static void
2028 badfieldspec (char const *spec, char const *msgid)
2029 {
2030   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2031          _(msgid), spec);
2032   abort ();
2033 }
2034
2035 /* Parse the leading integer in STRING and store the resulting value
2036    (which must fit into size_t) into *VAL.  Return the address of the
2037    suffix after the integer.  If MSGID is NULL, return NULL after
2038    failure; otherwise, report MSGID and exit on failure.  */
2039
2040 static char const *
2041 parse_field_count (char const *string, size_t *val, char const *msgid)
2042 {
2043   char *suffix;
2044   uintmax_t n;
2045
2046   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2047     {
2048     case LONGINT_OK:
2049     case LONGINT_INVALID_SUFFIX_CHAR:
2050       *val = n;
2051       if (*val == n)
2052         break;
2053       /* Fall through.  */
2054     case LONGINT_OVERFLOW:
2055       if (msgid)
2056         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2057                _(msgid), (int) (suffix - string), string);
2058       return NULL;
2059
2060     case LONGINT_INVALID:
2061       if (msgid)
2062         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2063                _(msgid), string);
2064       return NULL;
2065     }
2066
2067   return suffix;
2068 }
2069
2070 /* Handle interrupts and hangups. */
2071
2072 static void
2073 sighandler (int sig)
2074 {
2075 #ifndef SA_NOCLDSTOP
2076   signal (sig, SIG_IGN);
2077 #endif
2078
2079   cleanup ();
2080
2081 #ifdef SA_NOCLDSTOP
2082   {
2083     struct sigaction sigact;
2084
2085     sigact.sa_handler = SIG_DFL;
2086     sigemptyset (&sigact.sa_mask);
2087     sigact.sa_flags = 0;
2088     sigaction (sig, &sigact, NULL);
2089   }
2090 #else
2091   signal (sig, SIG_DFL);
2092 #endif
2093
2094   kill (process_id, sig);
2095 }
2096
2097 /* Set the ordering options for KEY specified in S.
2098    Return the address of the first character in S that
2099    is not a valid ordering option.
2100    BLANKTYPE is the kind of blanks that 'b' should skip. */
2101
2102 static char *
2103 set_ordering (register const char *s, struct keyfield *key,
2104               enum blanktype blanktype)
2105 {
2106   while (*s)
2107     {
2108       switch (*s)
2109         {
2110         case 'b':
2111           if (blanktype == bl_start || blanktype == bl_both)
2112             key->skipsblanks = 1;
2113           if (blanktype == bl_end || blanktype == bl_both)
2114             key->skipeblanks = 1;
2115           break;
2116         case 'd':
2117           key->ignore = nondictionary;
2118           break;
2119         case 'f':
2120           key->translate = fold_toupper;
2121           break;
2122         case 'g':
2123           key->general_numeric = 1;
2124           break;
2125         case 'i':
2126           key->ignore = nonprinting;
2127           break;
2128         case 'M':
2129           key->month = 1;
2130           break;
2131         case 'n':
2132           key->numeric = 1;
2133           break;
2134         case 'r':
2135           key->reverse = 1;
2136           break;
2137         default:
2138           return (char *) s;
2139         }
2140       ++s;
2141     }
2142   return (char *) s;
2143 }
2144
2145 static struct keyfield *
2146 new_key (void)
2147 {
2148   struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key);
2149   key->eword = -1;
2150   return key;
2151 }
2152
2153 int
2154 main (int argc, char **argv)
2155 {
2156   struct keyfield *key;
2157   struct keyfield gkey;
2158   char const *s;
2159   int i;
2160   int c = 0;
2161   int checkonly = 0, mergeonly = 0, nfiles = 0;
2162   int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);
2163   char *minus = "-", **files;
2164   char const *outfile = minus;
2165   static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2166   int nsigs = sizeof sigs / sizeof *sigs;
2167 #ifdef SA_NOCLDSTOP
2168   struct sigaction oldact, newact;
2169 #endif
2170
2171   program_name = argv[0];
2172   process_id = getpid ();
2173   setlocale (LC_ALL, "");
2174   bindtextdomain (PACKAGE, LOCALEDIR);
2175   textdomain (PACKAGE);
2176
2177 #ifdef ENABLE_NLS
2178
2179   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2180 # if HAVE_NL_LANGINFO
2181   hard_LC_TIME = hard_locale (LC_TIME);
2182 # endif
2183
2184   /* Let's get locale's representation of the decimal point */
2185   {
2186     struct lconv *lconvp = localeconv ();
2187
2188     /* If the locale doesn't define a decimal point, or if the decimal
2189        point is multibyte, use the C decimal point.  We don't support
2190        multibyte decimal points yet.  */
2191     decimal_point = *lconvp->decimal_point;
2192     if (! decimal_point || lconvp->decimal_point[1])
2193       decimal_point = C_DECIMAL_POINT;
2194
2195     /* We don't support multibyte thousands separators yet.  */
2196     th_sep = *lconvp->thousands_sep;
2197     if (! th_sep || lconvp->thousands_sep[1])
2198       th_sep = CHAR_MAX + 1;
2199   }
2200
2201 #endif /* NLS */
2202
2203   have_read_stdin = 0;
2204   inittables ();
2205
2206   /* Change the way xmalloc and xrealloc fail.  */
2207   xalloc_exit_failure = SORT_FAILURE;
2208   xalloc_fail_func = cleanup;
2209
2210 #ifdef SA_NOCLDSTOP
2211   sigemptyset (&caught_signals);
2212   for (i = 0; i < nsigs; i++)
2213     sigaddset (&caught_signals, sigs[i]);
2214   newact.sa_handler = sighandler;
2215   newact.sa_mask = caught_signals;
2216   newact.sa_flags = 0;
2217 #endif
2218
2219   for (i = 0; i < nsigs; i++)
2220     {
2221       int sig = sigs[i];
2222 #ifdef SA_NOCLDSTOP
2223       sigaction (sig, NULL, &oldact);
2224       if (oldact.sa_handler != SIG_IGN)
2225         sigaction (sig, &newact, NULL);
2226 #else
2227       if (signal (sig, SIG_IGN) != SIG_IGN)
2228         signal (sig, sighandler);
2229 #endif
2230     }
2231
2232   gkey.sword = gkey.eword = -1;
2233   gkey.ignore = NULL;
2234   gkey.translate = NULL;
2235   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2236   gkey.skipsblanks = gkey.skipeblanks = 0;
2237
2238   files = (char **) xmalloc (sizeof (char *) * argc);
2239
2240   for (;;)
2241     {
2242       /* Parse an operand as a file after "--" was seen; or if
2243          pedantic and a file was seen, unless -c was not seen and the
2244          operand is "-o FILE" or "-oFILE".  POSIX 1003.1-200x d5
2245          removes the exception for the -o options; when it becomes
2246          official the code will need to be changed.  */
2247
2248       if (c == -1
2249           || (posix_pedantic && nfiles != 0
2250               && ! (! checkonly
2251                     && optind != argc
2252                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2253                     && (argv[optind][2] || optind + 1 != argc)))
2254           || ((c = getopt_long (argc, argv,
2255                                 "-bcdfgik:mMno:rsS:t:T:uy::z",
2256                                 long_options, NULL))
2257               == -1))
2258         {
2259           if (optind == argc)
2260             break;
2261           files[nfiles++] = argv[optind++];
2262         }
2263       else switch (c)
2264         {
2265         case 1:
2266           /* Treat +POS1 [-POS2] as a key if possible; but silently
2267              treat an operand as a file if it is not a valid +POS1.
2268              POSIX 1003.1-200x d5 does not allow support for +POS1, so
2269              when it becomes official this code will need to be
2270              changed.  */
2271           key = NULL;
2272           if (optarg[0] == '+')
2273             {
2274               key = new_key ();
2275               s = parse_field_count (optarg + 1, &key->sword, NULL);
2276               if (s && *s == '.')
2277                 s = parse_field_count (s + 1, &key->schar, NULL);
2278               if (! (key->sword | key->schar))
2279                 key->sword = -1;
2280               if (! s || *set_ordering (s, key, bl_start))
2281                 {
2282                   free (key);
2283                   key = NULL;
2284                 }
2285               else
2286                 {
2287                   if (optind != argc && argv[optind][0] == '-'
2288                       && ISDIGIT (argv[optind][1]))
2289                     {
2290                       char const *optarg1 = argv[optind++];
2291                       s = parse_field_count (optarg1 + 1, &key->eword,
2292                                              N_("invalid number after `-'"));
2293                       if (*s == '.')
2294                         s = parse_field_count (s + 1, &key->echar,
2295                                                N_("invalid number after `.'"));
2296                       if (*set_ordering (s, key, bl_end))
2297                         badfieldspec (optarg1,
2298                                       N_("stray character in field spec"));
2299                     }
2300                   insertkey (key);
2301                 }
2302             }
2303           if (! key)
2304             files[nfiles++] = optarg;
2305           break;
2306
2307         case 'b':
2308         case 'd':
2309         case 'f':
2310         case 'g':
2311         case 'i':
2312         case 'M':
2313         case 'n':
2314         case 'r':
2315           {
2316             char str[2];
2317             str[0] = c;
2318             str[1] = '\0';
2319             set_ordering (str, &gkey, bl_both);
2320           }
2321           break;
2322
2323         case 'c':
2324           checkonly = 1;
2325           break;
2326
2327         case 'k':
2328           key = new_key ();
2329
2330           /* Get POS1. */
2331           s = parse_field_count (optarg, &key->sword,
2332                                  N_("invalid number at field start"));
2333           if (! key->sword--)
2334             {
2335               /* Provoke with `sort -k0' */
2336               badfieldspec (optarg, N_("field number is zero"));
2337             }
2338           if (*s == '.')
2339             {
2340               s = parse_field_count (s + 1, &key->schar,
2341                                      N_("invalid number after `.'"));
2342               if (! key->schar--)
2343                 {
2344                   /* Provoke with `sort -k1.0' */
2345                   badfieldspec (optarg, N_("character offset is zero"));
2346                 }
2347             }
2348           if (! (key->sword | key->schar))
2349             key->sword = -1;
2350           s = set_ordering (s, key, bl_start);
2351           if (*s != ',')
2352             {
2353               key->eword = -1;
2354               key->echar = 0;
2355             }
2356           else
2357             {
2358               /* Get POS2. */
2359               s = parse_field_count (s + 1, &key->eword,
2360                                      N_("invalid number after `,'"));
2361               if (! key->eword--)
2362                 {
2363                   /* Provoke with `sort -k1,0' */
2364                   badfieldspec (optarg, N_("field number is zero"));
2365                 }
2366               if (*s == '.')
2367                 s = parse_field_count (s + 1, &key->echar,
2368                                        N_("invalid number after `.'"));
2369               else
2370                 {
2371                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2372                   key->eword++;
2373                 }
2374               s = set_ordering (s, key, bl_end);
2375             }
2376           if (*s)
2377             badfieldspec (optarg, N_("stray character in field spec"));
2378           insertkey (key);
2379           break;
2380
2381         case 'm':
2382           mergeonly = 1;
2383           break;
2384
2385         case 'o':
2386           outfile = optarg;
2387           break;
2388
2389         case 's':
2390           stable = 1;
2391           break;
2392
2393         case 'S':
2394           specify_sort_size (optarg);
2395           break;
2396
2397         case 't':
2398           tab = optarg[0];
2399           if (tab && optarg[1])
2400             {
2401               /* Provoke with `sort -txx'.  Complain about
2402                  "multi-character tab" instead of "multibyte tab", so
2403                  that the diagnostic's wording does not need to be
2404                  changed once multibyte characters are supported.  */
2405               error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg);
2406             }
2407           break;
2408
2409         case 'T':
2410           add_temp_dir (optarg);
2411           break;
2412
2413         case 'u':
2414           unique = 1;
2415           break;
2416
2417         case 'y':
2418           /* Accept and ignore e.g. -y0 for compatibility with Solaris
2419              2.x through Solaris 7.  -y is marked as obsolete starting
2420              with Solaris 8.  */
2421           break;
2422
2423         case 'z':
2424           eolchar = 0;
2425           break;
2426
2427         case_GETOPT_HELP_CHAR;
2428
2429         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2430
2431         default:
2432           usage (SORT_FAILURE);
2433         }
2434     }
2435
2436   /* Inheritance of global options to individual keys. */
2437   for (key = keylist; key; key = key->next)
2438     if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2439         && !key->skipeblanks && !key->month && !key->numeric
2440         && !key->general_numeric)
2441       {
2442         key->ignore = gkey.ignore;
2443         key->translate = gkey.translate;
2444         key->skipsblanks = gkey.skipsblanks;
2445         key->skipeblanks = gkey.skipeblanks;
2446         key->month = gkey.month;
2447         key->numeric = gkey.numeric;
2448         key->general_numeric = gkey.general_numeric;
2449         key->reverse = gkey.reverse;
2450       }
2451
2452   if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks
2453                    || gkey.skipeblanks || gkey.month || gkey.numeric
2454                    || gkey.general_numeric))
2455     insertkey (&gkey);
2456   reverse = gkey.reverse;
2457
2458   if (temp_dir_count == 0)
2459     {
2460       char const *tmp_dir = getenv ("TMPDIR");
2461       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2462     }
2463
2464   if (nfiles == 0)
2465     {
2466       nfiles = 1;
2467       files = &minus;
2468     }
2469
2470   if (checkonly)
2471     {
2472       if (nfiles > 1)
2473         error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2474                files[1]);
2475
2476       /* POSIX requires that sort return 1 IFF invoked with -c and the
2477          input is not properly sorted.  */
2478       exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2479     }
2480
2481   if (mergeonly)
2482     {
2483       int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2484       merge (files, nfiles, max_merge, outfile);
2485     }
2486   else
2487     sort (files, nfiles, outfile);
2488
2489   if (have_read_stdin && fclose (stdin) == EOF)
2490     die (_("close failed"), "-");
2491
2492   cleanup ();
2493   exit (EXIT_SUCCESS);
2494 }