1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
34 #include "hard-locale.h"
44 #include "strnumcmp.h"
49 #if HAVE_SYS_RESOURCE_H
50 # include <sys/resource.h>
53 struct rlimit { size_t rlim_cur; };
54 # define getrlimit(Resource, Rlp) (-1)
57 /* The official name of this program (e.g., no `g' prefix). */
58 #define PROGRAM_NAME "sort"
60 #define AUTHORS "Mike Haertel", "Paul Eggert"
62 #if HAVE_LANGINFO_CODESET
63 # include <langinfo.h>
66 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
69 # define SA_NOCLDSTOP 0
70 /* No sigprocmask. Always 'return' zero. */
71 # define sigprocmask(How, Set, Oset) (0)
73 # if ! HAVE_SIGINTERRUPT
74 # define siginterrupt(sig, flag) /* empty */
82 #define UCHAR_LIM (UCHAR_MAX + 1)
84 #ifndef DEFAULT_TMPDIR
85 # define DEFAULT_TMPDIR "/tmp"
91 /* POSIX says to exit with status 1 if invoked with -c and the
92 input is not properly sorted. */
93 SORT_OUT_OF_ORDER = 1,
95 /* POSIX says any other irregular exit must exit with a status
96 code greater than 1. */
102 /* The number of times we should try to fork a compression process
103 (we retry if the fork call fails). We don't _need_ to compress
104 temp files, this is just to reduce disk access, so this number
106 MAX_FORK_TRIES_COMPRESS = 2,
108 /* The number of times we should try to fork a decompression process.
109 If we can't fork a decompression process, we can't sort, so this
110 number should be big. */
111 MAX_FORK_TRIES_DECOMPRESS = 8
114 /* The representation of the decimal point in the current locale. */
115 static int decimal_point;
117 /* Thousands separator; if -1, then there isn't one. */
118 static int thousands_sep;
120 /* Nonzero if the corresponding locales are hard. */
121 static bool hard_LC_COLLATE;
123 static bool hard_LC_TIME;
126 #define NONZERO(x) ((x) != 0)
128 /* The kind of blanks for '-b' to skip in various options. */
129 enum blanktype { bl_start, bl_end, bl_both };
131 /* The character marking end of line. Default to \n. */
132 static char eolchar = '\n';
134 /* Lines are held in core as counted strings. */
137 char *text; /* Text of the line. */
138 size_t length; /* Length including final newline. */
139 char *keybeg; /* Start of first key. */
140 char *keylim; /* Limit of first key. */
146 char *buf; /* Dynamically allocated buffer,
147 partitioned into 3 regions:
150 - an array of lines, in reverse order. */
151 size_t used; /* Number of bytes used for input data. */
152 size_t nlines; /* Number of lines in the line array. */
153 size_t alloc; /* Number of bytes allocated. */
154 size_t left; /* Number of bytes left from previous reads. */
155 size_t line_bytes; /* Number of bytes to reserve for each line. */
156 bool eof; /* An EOF has been read. */
161 size_t sword; /* Zero-origin 'word' to start at. */
162 size_t schar; /* Additional characters to skip. */
163 size_t eword; /* Zero-origin first word after field. */
164 size_t echar; /* Additional characters in field. */
165 bool const *ignore; /* Boolean array of characters to ignore. */
166 char const *translate; /* Translation applied to characters. */
167 bool skipsblanks; /* Skip leading blanks when finding start. */
168 bool skipeblanks; /* Skip leading blanks when finding end. */
169 bool numeric; /* Flag for numeric comparison. Handle
170 strings of digits with optional decimal
171 point, but no exponential notation. */
172 bool random; /* Sort by random hash of key. */
173 bool general_numeric; /* Flag for general, numeric comparison.
174 Handle numbers in exponential notation. */
175 bool month; /* Flag for comparison by month name. */
176 bool reverse; /* Reverse the sense of comparison. */
177 struct keyfield *next; /* Next keyfield to try. */
186 /* The name this program was run with. */
189 /* FIXME: None of these tables work with multibyte character sets.
190 Also, there are many other bugs when handling multibyte characters.
191 One way to fix this is to rewrite `sort' to use wide characters
192 internally, but doing this with good performance is a bit
195 /* Table of blanks. */
196 static bool blanks[UCHAR_LIM];
198 /* Table of non-printing characters. */
199 static bool nonprinting[UCHAR_LIM];
201 /* Table of non-dictionary characters (not letters, digits, or blanks). */
202 static bool nondictionary[UCHAR_LIM];
204 /* Translation table folding lower case to upper. */
205 static char fold_toupper[UCHAR_LIM];
207 #define MONTHS_PER_YEAR 12
209 /* Table mapping month names to integers.
210 Alphabetic order allows binary search. */
211 static struct month monthtab[] =
227 /* During the merge phase, the number of files to merge at once. */
230 /* Minimum size for a merge or check buffer. */
231 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
233 /* Minimum sort size; the code might not work with smaller sizes. */
234 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
236 /* The number of bytes needed for a merge or check buffer, which can
237 function relatively efficiently even if it holds only one line. If
238 a longer line is seen, this value is increased. */
239 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
241 /* The approximate maximum number of bytes of main memory to use, as
242 specified by the user. Zero if the user has not specified a size. */
243 static size_t sort_size;
245 /* The guessed size for non-regular files. */
246 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
248 /* Array of directory names in which any temporary files are to be created. */
249 static char const **temp_dirs;
251 /* Number of temporary directory names used. */
252 static size_t temp_dir_count;
254 /* Number of allocated slots in temp_dirs. */
255 static size_t temp_dir_alloc;
257 /* Flag to reverse the order of all comparisons. */
260 /* Flag for stable sort. This turns off the last ditch bytewise
261 comparison of lines, and instead leaves lines in the same order
262 they were read if all keys compare equal. */
265 /* If TAB has this value, blanks separate fields. */
266 enum { TAB_DEFAULT = CHAR_MAX + 1 };
268 /* Tab character separating fields. If TAB_DEFAULT, then fields are
269 separated by the empty string between a non-blank character and a blank
271 static int tab = TAB_DEFAULT;
273 /* Flag to remove consecutive duplicate lines from the output.
274 Only the last of a sequence of equal lines will be output. */
277 /* Nonzero if any of the input files are the standard input. */
278 static bool have_read_stdin;
280 /* List of key field comparisons to be tried. */
281 static struct keyfield *keylist;
283 /* Program used to (de)compress temp files. Must accept -d. */
284 static char const *compress_program;
286 static void sortlines_temp (struct line *, size_t, struct line *);
288 /* Report MESSAGE for FILE, then clean up and exit.
289 If FILE is null, it represents standard output. */
291 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
293 die (char const *message, char const *file)
295 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
302 if (status != EXIT_SUCCESS)
303 fprintf (stderr, _("Try `%s --help' for more information.\n"),
308 Usage: %s [OPTION]... [FILE]...\n\
312 Write sorted concatenation of all FILE(s) to standard output.\n\
316 Mandatory arguments to long options are mandatory for short options too.\n\
323 -b, --ignore-leading-blanks ignore leading blanks\n\
324 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
325 -f, --ignore-case fold lower case to upper case characters\n\
328 -g, --general-numeric-sort compare according to general numerical value\n\
329 -i, --ignore-nonprinting consider only printable characters\n\
330 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
331 -n, --numeric-sort compare according to string numerical value\n\
332 -R, --random-sort sort by random hash of keys\n\
333 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
334 -r, --reverse reverse the result of comparisons\n\
340 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
341 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
342 --compress-program=PROG compress temporaries with PROG;\n\
343 decompress them with PROG -d\n\
344 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
345 -m, --merge merge already sorted files; do not sort\n\
348 -o, --output=FILE write result to FILE instead of standard output\n\
349 -s, --stable stabilize sort by disabling last-resort comparison\n\
350 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
353 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
354 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
355 multiple options specify multiple directories\n\
356 -u, --unique with -c, check for strict ordering;\n\
357 without -c, output only the first of an equal run\n\
360 -z, --zero-terminated end lines with 0 byte, not newline\n\
362 fputs (HELP_OPTION_DESCRIPTION, stdout);
363 fputs (VERSION_OPTION_DESCRIPTION, stdout);
366 POS is F[.C][OPTS], where F is the field number and C the character position\n\
367 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
368 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
369 one or more single-letter ordering options, which override global ordering\n\
370 options for that key. If no key is given, use the entire line as the key.\n\
372 SIZE may be followed by the following multiplicative suffixes:\n\
375 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
377 With no FILE, or when FILE is -, read standard input.\n\
380 The locale specified by the environment affects sort order.\n\
381 Set LC_ALL=C to get the traditional sort order that uses\n\
382 native byte values.\n\
384 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
390 /* For long options that have no equivalent short option, use a
391 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
394 CHECK_OPTION = CHAR_MAX + 1,
395 COMPRESS_PROGRAM_OPTION,
399 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
401 static struct option const long_options[] =
403 {"ignore-leading-blanks", no_argument, NULL, 'b'},
404 {"check", optional_argument, NULL, CHECK_OPTION},
405 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
406 {"dictionary-order", no_argument, NULL, 'd'},
407 {"ignore-case", no_argument, NULL, 'f'},
408 {"general-numeric-sort", no_argument, NULL, 'g'},
409 {"ignore-nonprinting", no_argument, NULL, 'i'},
410 {"key", required_argument, NULL, 'k'},
411 {"merge", no_argument, NULL, 'm'},
412 {"month-sort", no_argument, NULL, 'M'},
413 {"numeric-sort", no_argument, NULL, 'n'},
414 {"random-sort", no_argument, NULL, 'R'},
415 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
416 {"output", required_argument, NULL, 'o'},
417 {"reverse", no_argument, NULL, 'r'},
418 {"stable", no_argument, NULL, 's'},
419 {"buffer-size", required_argument, NULL, 'S'},
420 {"field-separator", required_argument, NULL, 't'},
421 {"temporary-directory", required_argument, NULL, 'T'},
422 {"unique", no_argument, NULL, 'u'},
423 {"zero-terminated", no_argument, NULL, 'z'},
424 {GETOPT_HELP_OPTION_DECL},
425 {GETOPT_VERSION_OPTION_DECL},
429 static char const *const check_args[] =
431 "quiet", "silent", "diagnose-first", NULL
433 static char const check_types[] =
437 ARGMATCH_VERIFY (check_args, check_types);
439 /* The set of signals that are caught. */
440 static sigset_t caught_signals;
442 /* Critical section status. */
449 /* Enter a critical section. */
450 static struct cs_status
453 struct cs_status status;
454 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
458 /* Leave a critical section. */
460 cs_leave (struct cs_status status)
464 /* Ignore failure when restoring the signal mask. */
465 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
469 /* The list of temporary files. */
472 struct tempnode *volatile next;
473 pid_t pid; /* If compressed, the pid of compressor, else zero */
474 char name[1]; /* Actual size is 1 + file name length. */
476 static struct tempnode *volatile temphead;
477 static struct tempnode *volatile *temptail = &temphead;
482 pid_t pid; /* If compressed, the pid of compressor, else zero */
485 /* A table where we store compression process states. We clean up all
486 processes in a timely manner so as not to exhaust system resources,
487 so we store the info on whether the process is still running, or has
489 static Hash_table *proctab;
491 enum { INIT_PROCTAB_SIZE = 47 };
493 enum procstate { ALIVE, ZOMBIE };
495 /* A proctab entry. The COUNT field is there in case we fork a new
496 compression process that has the same PID as an old zombie process
497 that is still in the table (because the process to decompress the
498 temp file it was associated with hasn't started yet). */
502 enum procstate state;
507 proctab_hasher (const void *entry, size_t tabsize)
509 const struct procnode *node = entry;
510 return node->pid % tabsize;
514 proctab_comparator (const void *e1, const void *e2)
516 const struct procnode *n1 = e1, *n2 = e2;
517 return n1->pid == n2->pid;
520 /* The total number of forked processes (compressors and decompressors)
521 that have not been reaped yet. */
522 static size_t nprocs;
524 /* The number of child processes we'll allow before we try to reap some. */
525 enum { MAX_PROCS_BEFORE_REAP = 2 };
527 /* If 0 < PID, wait for the child process with that PID to exit.
528 If PID is -1, clean up a random child process which has finished and
529 return the process ID of that child. If PID is -1 and no processes
530 have quit yet, return 0 without waiting. */
536 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
539 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
543 if (! WIFEXITED (status) || WEXITSTATUS (status))
544 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
552 /* Add the PID of a running compression process to proctab, or update
553 the entry COUNT and STATE fields if it's already there. This also
554 creates the table for us the first time it's called. */
557 register_proc (pid_t pid)
559 struct procnode test, *node;
563 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
572 node = hash_lookup (proctab, &test);
580 node = xmalloc (sizeof *node);
584 hash_insert (proctab, node);
588 /* This is called when we reap a random process. We don't know
589 whether we have reaped a compression process or a decompression
590 process until we look in the table. If there's an ALIVE entry for
591 it, then we have reaped a compression process, so change the state
592 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
595 update_proc (pid_t pid)
597 struct procnode test, *node;
600 node = hash_lookup (proctab, &test);
602 node->state = ZOMBIE;
605 /* This is for when we need to wait for a compression process to exit.
606 If it has a ZOMBIE entry in the table then it's already dead and has
607 been reaped. Note that if there's an ALIVE entry for it, it still may
608 already have died and been reaped if a second process was created with
609 the same PID. This is probably exceedingly rare, but to be on the safe
610 side we will have to wait for any compression process with this PID. */
613 wait_proc (pid_t pid)
615 struct procnode test, *node;
618 node = hash_lookup (proctab, &test);
619 if (node->state == ALIVE)
622 node->state = ZOMBIE;
625 hash_delete (proctab, node);
630 /* Keep reaping finished children as long as there are more to reap.
631 This doesn't block waiting for any of them, it only reaps those
632 that are already dead. */
639 while (0 < nprocs && (pid = reap (-1)))
643 /* Clean up any remaining temporary files. */
648 struct tempnode const *node;
650 for (node = temphead; node; node = node->next)
655 /* Cleanup actions to take when exiting. */
662 /* Clean up any remaining temporary files in a critical section so
663 that a signal handler does not try to clean them too. */
664 struct cs_status cs = cs_enter ();
672 /* Create a new temporary file, returning its newly allocated tempnode.
673 Store into *PFD the file descriptor open for writing. */
675 static struct tempnode *
676 create_temp_file (int *pfd)
678 static char const slashbase[] = "/sortXXXXXX";
679 static size_t temp_dir_index;
682 char const *temp_dir = temp_dirs[temp_dir_index];
683 size_t len = strlen (temp_dir);
684 struct tempnode *node =
685 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
686 char *file = node->name;
689 memcpy (file, temp_dir, len);
690 memcpy (file + len, slashbase, sizeof slashbase);
693 if (++temp_dir_index == temp_dir_count)
696 /* Create the temporary file in a critical section, to avoid races. */
702 temptail = &node->next;
709 die (_("cannot create temporary file"), file);
715 /* Return a stream for FILE, opened with mode HOW. A null FILE means
716 standard output; HOW should be "w". When opening for input, "-"
717 means standard input. To avoid confusion, do not return file
718 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
719 opening an ordinary FILE. */
722 xfopen (const char *file, const char *how)
728 else if (STREQ (file, "-") && *how == 'r')
730 have_read_stdin = true;
735 fp = fopen (file, how);
737 die (_("open failed"), file);
743 /* Close FP, whose name is FILE, and report any errors. */
746 xfclose (FILE *fp, char const *file)
751 /* Allow reading stdin from tty more than once. */
757 /* Don't close stdout just yet. close_stdout does that. */
758 if (fflush (fp) != 0)
759 die (_("fflush failed"), file);
763 if (fclose (fp) != 0)
764 die (_("close failed"), file);
770 dup2_or_die (int oldfd, int newfd)
772 if (dup2 (oldfd, newfd) < 0)
773 error (SORT_FAILURE, errno, _("dup2 failed"));
776 /* Fork a child process for piping to and do common cleanup. The
777 TRIES parameter tells us how many times to try to fork before
778 giving up. Return the PID of the child or -1 if fork failed. */
781 pipe_fork (int pipefds[2], size_t tries)
783 #if HAVE_WORKING_FORK
784 struct tempnode *saved_temphead;
786 unsigned int wait_retry = 1;
787 pid_t pid IF_LINT (= -1);
790 if (pipe (pipefds) < 0)
795 /* This is so the child process won't delete our temp files
796 if it receives a signal before exec-ing. */
798 saved_temphead = temphead;
804 temphead = saved_temphead;
809 if (0 <= pid || errno != EAGAIN)
826 close (STDIN_FILENO);
827 close (STDOUT_FILENO);
834 #else /* ! HAVE_WORKING_FORK */
839 /* Create a temporary file and start a compression program to filter output
840 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
841 set it to the PID of the newly-created process. */
844 create_temp (FILE **pfp, pid_t *ppid)
847 struct tempnode *node = create_temp_file (&tempfd);
848 char *name = node->name;
850 if (! compress_program)
852 static char const default_compress_program[] = "gzip";
853 char const *prog = find_in_path (default_compress_program);
854 compress_program = (prog == default_compress_program ? "" : prog);
857 if (*compress_program)
861 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
868 register_proc (node->pid);
870 else if (node->pid == 0)
873 dup2_or_die (tempfd, STDOUT_FILENO);
875 dup2_or_die (pipefds[0], STDIN_FILENO);
878 if (execlp (compress_program, compress_program,
880 error (SORT_FAILURE, errno, _("couldn't execute %s"),
887 *pfp = fdopen (tempfd, "w");
889 die (_("couldn't create temporary file"), name);
897 /* Open a compressed temp file and start a decompression process through
898 which to filter the input. PID must be the valid processes ID of the
899 process used to compress the file. */
902 open_temp (const char *name, pid_t pid)
904 int tempfd, pipefds[2];
910 tempfd = open (name, O_RDONLY);
912 die (_("couldn't open temporary file"), name);
914 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
920 else if (child_pid == 0)
923 dup2_or_die (tempfd, STDIN_FILENO);
925 dup2_or_die (pipefds[1], STDOUT_FILENO);
928 if (execlp (compress_program, compress_program,
929 "-d", (char *) NULL) < 0)
930 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
934 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
937 fp = fdopen (pipefds[0], "r");
939 die (_("couldn't create temporary file"), name);
945 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
947 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
948 die (_("write failed"), output_file);
951 /* Append DIR to the array of temporary directory names. */
953 add_temp_dir (char const *dir)
955 if (temp_dir_count == temp_dir_alloc)
956 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
958 temp_dirs[temp_dir_count++] = dir;
961 /* Remove NAME from the list of temporary files. */
964 zaptemp (const char *name)
966 struct tempnode *volatile *pnode;
967 struct tempnode *node;
968 struct tempnode *next;
970 int unlink_errno = 0;
973 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
976 /* Unlink the temporary file in a critical section to avoid races. */
979 unlink_status = unlink (name);
980 unlink_errno = errno;
984 if (unlink_status != 0)
985 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
994 struct_month_cmp (const void *m1, const void *m2)
996 struct month const *month1 = m1;
997 struct month const *month2 = m2;
998 return strcmp (month1->name, month2->name);
1003 /* Initialize the character class tables. */
1010 for (i = 0; i < UCHAR_LIM; ++i)
1012 blanks[i] = !! isblank (i);
1013 nonprinting[i] = ! isprint (i);
1014 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1015 fold_toupper[i] = toupper (i);
1018 #if HAVE_NL_LANGINFO
1019 /* If we're not in the "C" locale, read different names for months. */
1022 for (i = 0; i < MONTHS_PER_YEAR; i++)
1029 s = (char *) nl_langinfo (ABMON_1 + i);
1031 monthtab[i].name = name = xmalloc (s_len + 1);
1032 monthtab[i].val = i + 1;
1034 for (j = 0; j < s_len; j++)
1035 name[j] = fold_toupper[to_uchar (s[j])];
1038 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1039 sizeof *monthtab, struct_month_cmp);
1044 /* Specify the amount of main memory to use when sorting. */
1046 specify_sort_size (char const *s)
1050 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1052 /* The default unit is KiB. */
1053 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1055 if (n <= UINTMAX_MAX / 1024)
1058 e = LONGINT_OVERFLOW;
1061 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1062 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1071 double mem = physmem_total () * n / 100;
1073 /* Use "<", not "<=", to avoid problems with rounding. */
1074 if (mem < UINTMAX_MAX)
1080 e = LONGINT_OVERFLOW;
1085 if (e == LONGINT_OK)
1087 /* If multiple sort sizes are specified, take the maximum, so
1088 that option order does not matter. */
1095 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1099 e = LONGINT_OVERFLOW;
1102 STRTOL_FATAL_ERROR (s, _("sort size"), e);
1105 /* Return the default sort size. */
1107 default_sort_size (void)
1109 /* Let MEM be available memory or 1/8 of total memory, whichever
1111 double avail = physmem_available ();
1112 double total = physmem_total ();
1113 double mem = MAX (avail, total / 8);
1114 struct rlimit rlimit;
1116 /* Let SIZE be MEM, but no more than the maximum object size or
1117 system resource limits. Avoid the MIN macro here, as it is not
1118 quite right when only one argument is floating point. Don't
1119 bother to check for values like RLIM_INFINITY since in practice
1120 they are not much less than SIZE_MAX. */
1121 size_t size = SIZE_MAX;
1124 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1125 size = rlimit.rlim_cur;
1127 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1128 size = rlimit.rlim_cur;
1131 /* Leave a large safety margin for the above limits, as failure can
1132 occur when they are exceeded. */
1136 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1137 Exceeding RSS is not fatal, but can be quite slow. */
1138 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1139 size = rlimit.rlim_cur / 16 * 15;
1142 /* Use no less than the minimum. */
1143 return MAX (size, MIN_SORT_SIZE);
1146 /* Return the sort buffer size to use with the input files identified
1147 by FPS and FILES, which are alternate names of the same files.
1148 NFILES gives the number of input files; NFPS may be less. Assume
1149 that each input line requires LINE_BYTES extra bytes' worth of line
1150 information. Do not exceed the size bound specified by the user
1151 (or a default size bound, if the user does not specify one). */
1154 sort_buffer_size (FILE *const *fps, size_t nfps,
1155 char *const *files, size_t nfiles,
1158 /* A bound on the input size. If zero, the bound hasn't been
1160 static size_t size_bound;
1162 /* In the worst case, each input byte is a newline. */
1163 size_t worst_case_per_input_byte = line_bytes + 1;
1165 /* Keep enough room for one extra input line and an extra byte.
1166 This extra room might be needed when preparing to read EOF. */
1167 size_t size = worst_case_per_input_byte + 1;
1171 for (i = 0; i < nfiles; i++)
1177 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1178 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1179 : stat (files[i], &st))
1181 die (_("stat failed"), files[i]);
1183 if (S_ISREG (st.st_mode))
1184 file_size = st.st_size;
1187 /* The file has unknown size. If the user specified a sort
1188 buffer size, use that; otherwise, guess the size. */
1191 file_size = INPUT_FILE_SIZE_GUESS;
1196 size_bound = sort_size;
1198 size_bound = default_sort_size ();
1201 /* Add the amount of memory needed to represent the worst case
1202 where the input consists entirely of newlines followed by a
1203 single non-newline. Check for overflow. */
1204 worst_case = file_size * worst_case_per_input_byte + 1;
1205 if (file_size != worst_case / worst_case_per_input_byte
1206 || size_bound - size <= worst_case)
1214 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1215 must be at least sizeof (struct line). Allocate ALLOC bytes
1219 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1221 /* Ensure that the line array is properly aligned. If the desired
1222 size cannot be allocated, repeatedly halve it until allocation
1223 succeeds. The smaller allocation may hurt overall performance,
1224 but that's better than failing. */
1227 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1228 buf->buf = malloc (alloc);
1232 if (alloc <= line_bytes + 1)
1236 buf->line_bytes = line_bytes;
1238 buf->used = buf->left = buf->nlines = 0;
1242 /* Return one past the limit of the line array. */
1244 static inline struct line *
1245 buffer_linelim (struct buffer const *buf)
1247 return (struct line *) (buf->buf + buf->alloc);
1250 /* Return a pointer to the first character of the field specified
1254 begfield (const struct line *line, const struct keyfield *key)
1256 char *ptr = line->text, *lim = ptr + line->length - 1;
1257 size_t sword = key->sword;
1258 size_t schar = key->schar;
1259 size_t remaining_bytes;
1261 /* The leading field separator itself is included in a field when -t
1264 if (tab != TAB_DEFAULT)
1265 while (ptr < lim && sword--)
1267 while (ptr < lim && *ptr != tab)
1273 while (ptr < lim && sword--)
1275 while (ptr < lim && blanks[to_uchar (*ptr)])
1277 while (ptr < lim && !blanks[to_uchar (*ptr)])
1281 if (key->skipsblanks)
1282 while (ptr < lim && blanks[to_uchar (*ptr)])
1285 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1286 remaining_bytes = lim - ptr;
1287 if (schar < remaining_bytes)
1295 /* Return the limit of (a pointer to the first character after) the field
1296 in LINE specified by KEY. */
1299 limfield (const struct line *line, const struct keyfield *key)
1301 char *ptr = line->text, *lim = ptr + line->length - 1;
1302 size_t eword = key->eword, echar = key->echar;
1303 size_t remaining_bytes;
1305 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1306 whichever comes first. If there are more than EWORD fields, leave
1307 PTR pointing at the beginning of the field having zero-based index,
1308 EWORD. If a delimiter character was specified (via -t), then that
1309 `beginning' is the first character following the delimiting TAB.
1310 Otherwise, leave PTR pointing at the first `blank' character after
1311 the preceding field. */
1312 if (tab != TAB_DEFAULT)
1313 while (ptr < lim && eword--)
1315 while (ptr < lim && *ptr != tab)
1317 if (ptr < lim && (eword | echar))
1321 while (ptr < lim && eword--)
1323 while (ptr < lim && blanks[to_uchar (*ptr)])
1325 while (ptr < lim && !blanks[to_uchar (*ptr)])
1329 #ifdef POSIX_UNSPECIFIED
1330 /* The following block of code makes GNU sort incompatible with
1331 standard Unix sort, so it's ifdef'd out for now.
1332 The POSIX spec isn't clear on how to interpret this.
1333 FIXME: request clarification.
1335 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1336 Date: Thu, 30 May 96 12:20:41 -0400
1337 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1339 [...]I believe I've found another bug in `sort'.
1344 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1347 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1351 Unix sort produced the answer I expected: sort on the single character
1352 in column 7. GNU sort produced different results, because it disagrees
1353 on the interpretation of the key-end spec "M.N". Unix sort reads this
1354 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1355 "skip M-1 fields, then either N-1 characters or the rest of the current
1356 field, whichever comes first". This extra clause applies only to
1357 key-ends, not key-starts.
1360 /* Make LIM point to the end of (one byte past) the current field. */
1361 if (tab != TAB_DEFAULT)
1364 newlim = memchr (ptr, tab, lim - ptr);
1372 while (newlim < lim && blanks[to_uchar (*newlim)])
1374 while (newlim < lim && !blanks[to_uchar (*newlim)])
1380 /* If we're ignoring leading blanks when computing the End
1381 of the field, don't start counting bytes until after skipping
1382 past any leading blanks. */
1383 if (key->skipeblanks)
1384 while (ptr < lim && blanks[to_uchar (*ptr)])
1387 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1388 remaining_bytes = lim - ptr;
1389 if (echar < remaining_bytes)
1397 /* Fill BUF reading from FP, moving buf->left bytes from the end
1398 of buf->buf to the beginning first. If EOF is reached and the
1399 file wasn't terminated by a newline, supply one. Set up BUF's line
1400 table too. FILE is the name of the file corresponding to FP.
1401 Return true if some input was read. */
1404 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1406 struct keyfield const *key = keylist;
1408 size_t line_bytes = buf->line_bytes;
1409 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1414 if (buf->used != buf->left)
1416 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1417 buf->used = buf->left;
1423 char *ptr = buf->buf + buf->used;
1424 struct line *linelim = buffer_linelim (buf);
1425 struct line *line = linelim - buf->nlines;
1426 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1427 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1429 while (line_bytes + 1 < avail)
1431 /* Read as many bytes as possible, but do not read so many
1432 bytes that there might not be enough room for the
1433 corresponding line array. The worst case is when the
1434 rest of the input file consists entirely of newlines,
1435 except that the last byte is not a newline. */
1436 size_t readsize = (avail - 1) / (line_bytes + 1);
1437 size_t bytes_read = fread (ptr, 1, readsize, fp);
1438 char *ptrlim = ptr + bytes_read;
1440 avail -= bytes_read;
1442 if (bytes_read != readsize)
1445 die (_("read failed"), file);
1449 if (buf->buf == ptrlim)
1451 if (ptrlim[-1] != eol)
1456 /* Find and record each line in the just-read input. */
1457 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1461 line->text = line_start;
1462 line->length = ptr - line_start;
1463 mergesize = MAX (mergesize, line->length);
1464 avail -= line_bytes;
1468 /* Precompute the position of the first key for
1470 line->keylim = (key->eword == SIZE_MAX
1472 : limfield (line, key));
1474 if (key->sword != SIZE_MAX)
1475 line->keybeg = begfield (line, key);
1478 if (key->skipsblanks)
1479 while (blanks[to_uchar (*line_start)])
1481 line->keybeg = line_start;
1493 buf->used = ptr - buf->buf;
1494 buf->nlines = buffer_linelim (buf) - line;
1495 if (buf->nlines != 0)
1497 buf->left = ptr - line_start;
1498 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1502 /* The current input line is too long to fit in the buffer.
1503 Double the buffer size and try again. */
1504 buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1508 /* Compare strings A and B as numbers without explicitly converting them to
1509 machine numbers. Comparatively slow for short strings, but asymptotically
1513 numcompare (const char *a, const char *b)
1515 while (blanks[to_uchar (*a)])
1517 while (blanks[to_uchar (*b)])
1520 return strnumcmp (a, b, decimal_point, thousands_sep);
1524 general_numcompare (const char *sa, const char *sb)
1526 /* FIXME: add option to warn about failed conversions. */
1527 /* FIXME: maybe add option to try expensive FP conversion
1528 only if A and B can't be compared more cheaply/accurately. */
1532 double a = strtod (sa, &ea);
1533 double b = strtod (sb, &eb);
1535 /* Put conversion errors at the start of the collating sequence. */
1537 return sb == eb ? 0 : -1;
1541 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1542 conversion errors but before numbers; sort them by internal
1543 bit-pattern, for lack of a more portable alternative. */
1549 : memcmp ((char *) &a, (char *) &b, sizeof a));
1552 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1553 Return 0 if the name in S is not recognized. */
1556 getmonth (char const *month, size_t len)
1559 size_t hi = MONTHS_PER_YEAR;
1560 char const *monthlim = month + len;
1564 if (month == monthlim)
1566 if (!blanks[to_uchar (*month)])
1573 size_t ix = (lo + hi) / 2;
1574 char const *m = month;
1575 char const *n = monthtab[ix].name;
1580 return monthtab[ix].val;
1581 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1586 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1598 /* A source of random data. */
1599 static struct randread_source *randread_source;
1601 /* Return the Ith randomly-generated state. The caller must invoke
1602 random_state (H) for all H less than I before invoking random_state
1605 static struct md5_ctx
1606 random_state (size_t i)
1608 /* An array of states resulting from the random data, and counts of
1609 its used and allocated members. */
1610 static struct md5_ctx *state;
1612 static size_t allocated;
1614 struct md5_ctx *s = &state[i];
1618 unsigned char buf[MD5_DIGEST_SIZE];
1624 state = X2NREALLOC (state, &allocated);
1628 randread (randread_source, buf, sizeof buf);
1630 md5_process_bytes (buf, sizeof buf, s);
1636 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1637 with length LENGTHB. Return negative if less, zero if equal,
1638 positive if greater. */
1641 cmp_hashes (char const *texta, size_t lena,
1642 char const *textb, size_t lenb)
1644 /* Try random hashes until a pair of hashes disagree. But if the
1645 first pair of random hashes agree, check whether the keys are
1646 identical and if so report no difference. */
1651 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1652 struct md5_ctx s[2];
1653 s[0] = s[1] = random_state (i);
1654 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1655 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1656 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1659 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1666 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1667 using one or more random hash functions. */
1670 compare_random (char *restrict texta, size_t lena,
1671 char *restrict textb, size_t lenb)
1675 if (! hard_LC_COLLATE)
1676 diff = cmp_hashes (texta, lena, textb, lenb);
1679 /* Transform the text into the basis of comparison, so that byte
1680 strings that would otherwise considered to be equal are
1681 considered equal here even if their bytes differ. */
1684 char stackbuf[4000];
1685 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1686 bool a_fits = tlena <= sizeof stackbuf;
1687 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1688 (a_fits ? sizeof stackbuf - tlena : 0),
1691 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1695 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1696 faster by avoiding the need for an extra buffer copy. */
1697 buf = xmalloc (tlena + tlenb + 1);
1698 xmemxfrm (buf, tlena + 1, texta, lena);
1699 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1702 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1704 if (buf != stackbuf)
1711 /* Compare two lines A and B trying every key in sequence until there
1712 are no more keys or a difference is found. */
1715 keycompare (const struct line *a, const struct line *b)
1717 struct keyfield const *key = keylist;
1719 /* For the first iteration only, the key positions have been
1720 precomputed for us. */
1721 char *texta = a->keybeg;
1722 char *textb = b->keybeg;
1723 char *lima = a->keylim;
1724 char *limb = b->keylim;
1730 char const *translate = key->translate;
1731 bool const *ignore = key->ignore;
1733 /* Find the lengths. */
1734 size_t lena = lima <= texta ? 0 : lima - texta;
1735 size_t lenb = limb <= textb ? 0 : limb - textb;
1737 /* Actually compare the fields. */
1740 diff = compare_random (texta, lena, textb, lenb);
1741 else if (key->numeric | key->general_numeric)
1743 char savea = *lima, saveb = *limb;
1745 *lima = *limb = '\0';
1746 diff = ((key->numeric ? numcompare : general_numcompare)
1748 *lima = savea, *limb = saveb;
1750 else if (key->month)
1751 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1752 /* Sorting like this may become slow, so in a simple locale the user
1753 can select a faster sort that is similar to ascii sort. */
1754 else if (hard_LC_COLLATE)
1756 if (ignore || translate)
1759 size_t size = lena + 1 + lenb + 1;
1760 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1761 char *copy_b = copy_a + lena + 1;
1762 size_t new_len_a, new_len_b, i;
1764 /* Ignore and/or translate chars before comparing. */
1765 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1769 copy_a[new_len_a] = (translate
1770 ? translate[to_uchar (texta[i])]
1772 if (!ignore || !ignore[to_uchar (texta[i])])
1777 copy_b[new_len_b] = (translate
1778 ? translate[to_uchar (textb[i])]
1780 if (!ignore || !ignore[to_uchar (textb[i])])
1785 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1787 if (sizeof buf < size)
1791 diff = - NONZERO (lenb);
1795 diff = xmemcoll (texta, lena, textb, lenb);
1799 #define CMP_WITH_IGNORE(A, B) \
1804 while (texta < lima && ignore[to_uchar (*texta)]) \
1806 while (textb < limb && ignore[to_uchar (*textb)]) \
1808 if (! (texta < lima && textb < limb)) \
1810 diff = to_uchar (A) - to_uchar (B); \
1817 diff = (texta < lima) - (textb < limb); \
1822 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1823 translate[to_uchar (*textb)]);
1825 CMP_WITH_IGNORE (*texta, *textb);
1828 diff = - NONZERO (lenb);
1835 while (texta < lima && textb < limb)
1837 diff = (to_uchar (translate[to_uchar (*texta++)])
1838 - to_uchar (translate[to_uchar (*textb++)]));
1845 diff = memcmp (texta, textb, MIN (lena, lenb));
1849 diff = lena < lenb ? -1 : lena != lenb;
1859 /* Find the beginning and limit of the next field. */
1860 if (key->eword != SIZE_MAX)
1861 lima = limfield (a, key), limb = limfield (b, key);
1863 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1865 if (key->sword != SIZE_MAX)
1866 texta = begfield (a, key), textb = begfield (b, key);
1869 texta = a->text, textb = b->text;
1870 if (key->skipsblanks)
1872 while (texta < lima && blanks[to_uchar (*texta)])
1874 while (textb < limb && blanks[to_uchar (*textb)])
1885 return key->reverse ? -diff : diff;
1888 /* Compare two lines A and B, returning negative, zero, or positive
1889 depending on whether A compares less than, equal to, or greater than B. */
1892 compare (const struct line *a, const struct line *b)
1897 /* First try to compare on the specified keys (if any).
1898 The only two cases with no key at all are unadorned sort,
1899 and unadorned sort -r. */
1902 diff = keycompare (a, b);
1903 if (diff | unique | stable)
1907 /* If the keys all compare equal (or no keys were specified)
1908 fall through to the default comparison. */
1909 alen = a->length - 1, blen = b->length - 1;
1912 diff = - NONZERO (blen);
1915 else if (hard_LC_COLLATE)
1916 diff = xmemcoll (a->text, alen, b->text, blen);
1917 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1918 diff = alen < blen ? -1 : alen != blen;
1920 return reverse ? -diff : diff;
1923 /* Check that the lines read from FILE_NAME come in order. Return
1924 true if they are in order. If CHECKONLY == 'c', also print a
1925 diagnostic (FILE_NAME, line number, contents of line) to stderr if
1926 they are not in order. */
1929 check (char const *file_name, char checkonly)
1931 FILE *fp = xfopen (file_name, "r");
1932 struct buffer buf; /* Input buffer. */
1933 struct line temp; /* Copy of previous line. */
1935 uintmax_t line_number = 0;
1936 struct keyfield const *key = keylist;
1937 bool nonunique = ! unique;
1938 bool ordered = true;
1940 initbuf (&buf, sizeof (struct line),
1941 MAX (merge_buffer_size, sort_size));
1944 while (fillbuf (&buf, fp, file_name))
1946 struct line const *line = buffer_linelim (&buf);
1947 struct line const *linebase = line - buf.nlines;
1949 /* Make sure the line saved from the old buffer contents is
1950 less than or equal to the first line of the new buffer. */
1951 if (alloc && nonunique <= compare (&temp, line - 1))
1955 if (checkonly == 'c')
1957 struct line const *disorder_line = line - 1;
1958 uintmax_t disorder_line_number =
1959 buffer_linelim (&buf) - disorder_line + line_number;
1960 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1961 fprintf (stderr, _("%s: %s:%s: disorder: "),
1962 program_name, file_name,
1963 umaxtostr (disorder_line_number, hr_buf));
1964 write_bytes (disorder_line->text, disorder_line->length,
1965 stderr, _("standard error"));
1973 /* Compare each line in the buffer with its successor. */
1974 while (linebase < --line)
1975 if (nonunique <= compare (line, line - 1))
1976 goto found_disorder;
1978 line_number += buf.nlines;
1980 /* Save the last line of the buffer. */
1981 if (alloc < line->length)
1988 alloc = line->length;
1992 while (alloc < line->length);
1994 temp.text = xrealloc (temp.text, alloc);
1996 memcpy (temp.text, line->text, line->length);
1997 temp.length = line->length;
2000 temp.keybeg = temp.text + (line->keybeg - line->text);
2001 temp.keylim = temp.text + (line->keylim - line->text);
2005 xfclose (fp, file_name);
2011 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2012 files (all of which are at the start of the FILES array), and
2013 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2014 Close input and output files before returning.
2015 OUTPUT_FILE gives the name of the output file. If it is NULL,
2016 the output file is standard output. If OFP is NULL, the output
2017 file has not been opened yet (or written to, if standard output). */
2020 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2021 FILE *ofp, char const *output_file)
2023 FILE *fps[NMERGE]; /* Input streams for each file. */
2024 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2025 struct line saved; /* Saved line storage for unique check. */
2026 struct line const *savedline = NULL;
2027 /* &saved if there is a saved line. */
2028 size_t savealloc = 0; /* Size allocated for the saved line. */
2029 struct line const *cur[NMERGE]; /* Current line in each line table. */
2030 struct line const *base[NMERGE]; /* Base of each line table. */
2031 size_t ord[NMERGE]; /* Table representing a permutation of fps,
2032 such that cur[ord[0]] is the smallest line
2033 and will be next output. */
2037 struct keyfield const *key = keylist;
2040 /* Read initial lines from each input file. */
2041 for (i = 0; i < nfiles; )
2043 fps[i] = (files[i].pid
2044 ? open_temp (files[i].name, files[i].pid)
2045 : xfopen (files[i].name, "r"));
2046 initbuf (&buffer[i], sizeof (struct line),
2047 MAX (merge_buffer_size, sort_size / nfiles));
2048 if (fillbuf (&buffer[i], fps[i], files[i].name))
2050 struct line const *linelim = buffer_linelim (&buffer[i]);
2051 cur[i] = linelim - 1;
2052 base[i] = linelim - buffer[i].nlines;
2057 /* fps[i] is empty; eliminate it from future consideration. */
2058 xfclose (fps[i], files[i].name);
2062 zaptemp (files[i].name);
2064 free (buffer[i].buf);
2066 for (j = i; j < nfiles; ++j)
2067 files[j] = files[j + 1];
2072 ofp = xfopen (output_file, "w");
2074 /* Set up the ord table according to comparisons among input lines.
2075 Since this only reorders two items if one is strictly greater than
2076 the other, it is stable. */
2077 for (i = 0; i < nfiles; ++i)
2079 for (i = 1; i < nfiles; ++i)
2080 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2081 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2083 /* Repeatedly output the smallest line until no input remains. */
2086 struct line const *smallest = cur[ord[0]];
2088 /* If uniquified output is turned on, output only the first of
2089 an identical series of lines. */
2092 if (savedline && compare (savedline, smallest))
2095 write_bytes (saved.text, saved.length, ofp, output_file);
2100 if (savealloc < smallest->length)
2105 savealloc = smallest->length;
2108 while ((savealloc *= 2) < smallest->length);
2110 saved.text = xrealloc (saved.text, savealloc);
2112 saved.length = smallest->length;
2113 memcpy (saved.text, smallest->text, saved.length);
2117 saved.text + (smallest->keybeg - smallest->text);
2119 saved.text + (smallest->keylim - smallest->text);
2124 write_bytes (smallest->text, smallest->length, ofp, output_file);
2126 /* Check if we need to read more lines into core. */
2127 if (base[ord[0]] < smallest)
2128 cur[ord[0]] = smallest - 1;
2131 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2133 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2134 cur[ord[0]] = linelim - 1;
2135 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2139 /* We reached EOF on fps[ord[0]]. */
2140 for (i = 1; i < nfiles; ++i)
2141 if (ord[i] > ord[0])
2144 xfclose (fps[ord[0]], files[ord[0]].name);
2145 if (ord[0] < ntemps)
2148 zaptemp (files[ord[0]].name);
2150 free (buffer[ord[0]].buf);
2151 for (i = ord[0]; i < nfiles; ++i)
2153 fps[i] = fps[i + 1];
2154 files[i] = files[i + 1];
2155 buffer[i] = buffer[i + 1];
2156 cur[i] = cur[i + 1];
2157 base[i] = base[i + 1];
2159 for (i = 0; i < nfiles; ++i)
2160 ord[i] = ord[i + 1];
2165 /* The new line just read in may be larger than other lines
2166 already in main memory; push it back in the queue until we
2167 encounter a line larger than it. Optimize for the common
2168 case where the new line is smallest. */
2173 size_t ord0 = ord[0];
2174 size_t count_of_smaller_lines;
2178 int cmp = compare (cur[ord0], cur[ord[probe]]);
2179 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2183 probe = (lo + hi) / 2;
2186 count_of_smaller_lines = lo - 1;
2187 for (j = 0; j < count_of_smaller_lines; j++)
2188 ord[j] = ord[j + 1];
2189 ord[count_of_smaller_lines] = ord0;
2192 /* Free up some resources every once in a while. */
2193 if (MAX_PROCS_BEFORE_REAP < nprocs)
2197 if (unique && savedline)
2199 write_bytes (saved.text, saved.length, ofp, output_file);
2203 xfclose (ofp, output_file);
2206 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2207 and HI (with NHI members). T, LO, and HI point just past their
2208 respective arrays, and the arrays are in reverse order. NLO and
2209 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2212 mergelines (struct line *t,
2213 struct line const *lo, size_t nlo,
2214 struct line const *hi, size_t nhi)
2217 if (compare (lo - 1, hi - 1) <= 0)
2222 /* HI - NHI equalled T - (NLO + NHI) when this function
2223 began. Therefore HI must equal T now, and there is no
2224 need to copy from HI to T. */
2242 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2243 NLINES must be at least 2.
2244 The input and output arrays are in reverse order, and LINES and
2245 TEMP point just past the end of their respective arrays.
2247 Use a recursive divide-and-conquer algorithm, in the style
2248 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2249 the optimization suggested by exercise 5.2.4-10; this requires room
2250 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2251 writes that this memory optimization was originally published by
2252 D. A. Bell, Comp J. 1 (1958), 75. */
2255 sortlines (struct line *lines, size_t nlines, struct line *temp)
2259 if (0 < compare (&lines[-1], &lines[-2]))
2261 struct line tmp = lines[-1];
2262 lines[-1] = lines[-2];
2268 size_t nlo = nlines / 2;
2269 size_t nhi = nlines - nlo;
2270 struct line *lo = lines;
2271 struct line *hi = lines - nlo;
2272 struct line *sorted_lo = temp;
2274 sortlines (hi, nhi, temp);
2276 sortlines_temp (lo, nlo, sorted_lo);
2278 sorted_lo[-1] = lo[-1];
2280 mergelines (lines, sorted_lo, nlo, hi, nhi);
2284 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2285 rather than sorting in place. */
2288 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2292 /* Declare `swap' as int, not bool, to work around a bug
2293 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2294 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2295 int swap = (0 < compare (&lines[-1], &lines[-2]));
2296 temp[-1] = lines[-1 - swap];
2297 temp[-2] = lines[-2 + swap];
2301 size_t nlo = nlines / 2;
2302 size_t nhi = nlines - nlo;
2303 struct line *lo = lines;
2304 struct line *hi = lines - nlo;
2305 struct line *sorted_hi = temp - nlo;
2307 sortlines_temp (hi, nhi, sorted_hi);
2309 sortlines (lo, nlo, temp);
2311 mergelines (temp, lo, nlo, sorted_hi, nhi);
2315 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2316 the same as OUTFILE. If found, merge the found instances (and perhaps
2317 some other files) into a temporary file so that it can in turn be
2318 merged into OUTFILE without destroying OUTFILE before it is completely
2319 read. Return the new value of NFILES, which differs from the old if
2320 some merging occurred.
2322 This test ensures that an otherwise-erroneous use like
2323 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2324 It's not clear that POSIX requires this nicety.
2325 Detect common error cases, but don't try to catch obscure cases like
2326 "cat ... FILE ... | sort -m -o FILE"
2327 where traditional "sort" doesn't copy the input and where
2328 people should know that they're getting into trouble anyway.
2329 Catching these obscure cases would slow down performance in
2333 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2334 size_t nfiles, char const *outfile)
2337 bool got_outstat = false;
2338 struct stat outstat;
2340 for (i = ntemps; i < nfiles; i++)
2342 bool is_stdin = STREQ (files[i].name, "-");
2346 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2353 ? stat (outfile, &outstat)
2354 : fstat (STDOUT_FILENO, &outstat))
2361 ? fstat (STDIN_FILENO, &instat)
2362 : stat (files[i].name, &instat))
2364 && SAME_INODE (instat, outstat));
2371 char *temp = create_temp (&tftp, &pid);
2372 mergefps (&files[i],0, nfiles - i, tftp, temp);
2373 files[i].name = temp;
2382 /* Merge the input FILES. NTEMPS is the number of files at the
2383 start of FILES that are temporary; it is zero at the top level.
2384 NFILES is the total number of files. Put the output in
2385 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2388 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2389 char const *output_file)
2391 while (NMERGE < nfiles)
2393 /* Number of input files processed so far. */
2396 /* Number of output files generated so far. */
2399 /* nfiles % NMERGE; this counts input files that are left over
2400 after all full-sized merges have been done. */
2403 /* Number of easily-available slots at the next loop iteration. */
2406 /* Do as many NMERGE-size merges as possible. */
2407 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2411 char *temp = create_temp (&tfp, &pid);
2412 size_t nt = MIN (ntemps, NMERGE);
2414 mergefps (&files[in], nt, NMERGE, tfp, temp);
2415 files[out].name = temp;
2416 files[out].pid = pid;
2419 remainder = nfiles - in;
2420 cheap_slots = NMERGE - out % NMERGE;
2422 if (cheap_slots < remainder)
2424 /* So many files remain that they can't all be put into the last
2425 NMERGE-sized output window. Do one more merge. Merge as few
2426 files as possible, to avoid needless I/O. */
2427 size_t nshortmerge = remainder - cheap_slots + 1;
2430 char *temp = create_temp (&tfp, &pid);
2431 size_t nt = MIN (ntemps, nshortmerge);
2433 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2434 files[out].name = temp;
2435 files[out++].pid = pid;
2439 /* Put the remaining input files into the last NMERGE-sized output
2440 window, so they will be merged in the next pass. */
2441 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2446 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2447 mergefps (files, ntemps, nfiles, NULL, output_file);
2450 /* Sort NFILES FILES onto OUTPUT_FILE. */
2453 sort (char * const *files, size_t nfiles, char const *output_file)
2457 bool output_file_created = false;
2463 char const *temp_output;
2464 char const *file = *files;
2465 FILE *fp = xfopen (file, "r");
2467 size_t bytes_per_line = (2 * sizeof (struct line)
2468 - sizeof (struct line) / 2);
2471 initbuf (&buf, bytes_per_line,
2472 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2477 while (fillbuf (&buf, fp, file))
2480 struct line *linebase;
2482 if (buf.eof && nfiles
2483 && (bytes_per_line + 1
2484 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2486 /* End of file, but there is more input and buffer room.
2487 Concatenate the next input file; this is faster in
2489 buf.left = buf.used;
2493 line = buffer_linelim (&buf);
2494 linebase = line - buf.nlines;
2496 sortlines (line, buf.nlines, linebase);
2497 if (buf.eof && !nfiles && !ntemps && !buf.left)
2500 tfp = xfopen (output_file, "w");
2501 temp_output = output_file;
2502 output_file_created = true;
2507 temp_output = create_temp (&tfp, NULL);
2513 write_bytes (line->text, line->length, tfp, temp_output);
2515 while (linebase < line && compare (line, line - 1) == 0)
2518 while (linebase < line);
2520 xfclose (tfp, temp_output);
2522 /* Free up some resources every once in a while. */
2523 if (MAX_PROCS_BEFORE_REAP < nprocs)
2526 if (output_file_created)
2535 if (! output_file_created)
2538 struct tempnode *node = temphead;
2539 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2540 for (i = 0; node; i++)
2542 tempfiles[i].name = node->name;
2543 tempfiles[i].pid = node->pid;
2546 merge (tempfiles, ntemps, ntemps, output_file);
2551 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2554 insertkey (struct keyfield *key_arg)
2556 struct keyfield **p;
2557 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2559 for (p = &keylist; *p; p = &(*p)->next)
2565 /* Report a bad field specification SPEC, with extra info MSGID. */
2567 static void badfieldspec (char const *, char const *)
2570 badfieldspec (char const *spec, char const *msgid)
2572 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2573 _(msgid), quote (spec));
2577 /* Report incompatible options. */
2579 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2581 incompatible_options (char const *opts)
2583 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2587 /* Check compatibility of ordering options. */
2590 check_ordering_compatibility (void)
2592 struct keyfield const *key;
2594 for (key = keylist; key; key = key->next)
2595 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2597 || (key->random && key->translate))
2601 if (key->ignore == nondictionary)
2605 if (key->general_numeric)
2607 if (key->ignore == nonprinting)
2616 incompatible_options (opts);
2620 /* Parse the leading integer in STRING and store the resulting value
2621 (which must fit into size_t) into *VAL. Return the address of the
2622 suffix after the integer. If the value is too large, silently
2623 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2624 failure; otherwise, report MSGID and exit on failure. */
2627 parse_field_count (char const *string, size_t *val, char const *msgid)
2632 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2635 case LONGINT_INVALID_SUFFIX_CHAR:
2640 case LONGINT_OVERFLOW:
2641 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2645 case LONGINT_INVALID:
2647 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2648 _(msgid), quote (string));
2655 /* Handle interrupts and hangups. */
2658 sighandler (int sig)
2661 signal (sig, SIG_IGN);
2665 signal (sig, SIG_DFL);
2669 /* Set the ordering options for KEY specified in S.
2670 Return the address of the first character in S that
2671 is not a valid ordering option.
2672 BLANKTYPE is the kind of blanks that 'b' should skip. */
2675 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2682 if (blanktype == bl_start || blanktype == bl_both)
2683 key->skipsblanks = true;
2684 if (blanktype == bl_end || blanktype == bl_both)
2685 key->skipeblanks = true;
2688 key->ignore = nondictionary;
2691 key->translate = fold_toupper;
2694 key->general_numeric = true;
2697 /* Option order should not matter, so don't let -i override
2698 -d. -d implies -i, but -i does not imply -d. */
2700 key->ignore = nonprinting;
2706 key->numeric = true;
2712 key->reverse = true;
2722 static struct keyfield *
2723 key_init (struct keyfield *key)
2725 memset (key, 0, sizeof *key);
2726 key->eword = SIZE_MAX;
2731 main (int argc, char **argv)
2733 struct keyfield *key;
2734 struct keyfield key_buf;
2735 struct keyfield gkey;
2739 bool mergeonly = false;
2740 char *random_source = NULL;
2741 bool need_random = false;
2743 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2744 bool obsolete_usage = (posix2_version () < 200112);
2746 char const *outfile = NULL;
2748 initialize_main (&argc, &argv);
2749 program_name = argv[0];
2750 setlocale (LC_ALL, "");
2751 bindtextdomain (PACKAGE, LOCALEDIR);
2752 textdomain (PACKAGE);
2754 initialize_exit_failure (SORT_FAILURE);
2756 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2757 #if HAVE_NL_LANGINFO
2758 hard_LC_TIME = hard_locale (LC_TIME);
2761 /* Get locale's representation of the decimal point. */
2763 struct lconv const *locale = localeconv ();
2765 /* If the locale doesn't define a decimal point, or if the decimal
2766 point is multibyte, use the C locale's decimal point. FIXME:
2767 add support for multibyte decimal points. */
2768 decimal_point = to_uchar (locale->decimal_point[0]);
2769 if (! decimal_point || locale->decimal_point[1])
2770 decimal_point = '.';
2772 /* FIXME: add support for multibyte thousands separators. */
2773 thousands_sep = to_uchar (*locale->thousands_sep);
2774 if (! thousands_sep || locale->thousands_sep[1])
2778 have_read_stdin = false;
2783 static int const sig[] =
2785 /* The usual suspects. */
2786 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2803 enum { nsigs = sizeof sig / sizeof sig[0] };
2806 struct sigaction act;
2808 sigemptyset (&caught_signals);
2809 for (i = 0; i < nsigs; i++)
2811 sigaction (sig[i], NULL, &act);
2812 if (act.sa_handler != SIG_IGN)
2813 sigaddset (&caught_signals, sig[i]);
2816 act.sa_handler = sighandler;
2817 act.sa_mask = caught_signals;
2820 for (i = 0; i < nsigs; i++)
2821 if (sigismember (&caught_signals, sig[i]))
2822 sigaction (sig[i], &act, NULL);
2824 for (i = 0; i < nsigs; i++)
2825 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2827 signal (sig[i], sighandler);
2828 siginterrupt (sig[i], 1);
2833 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2834 atexit (exit_cleanup);
2836 gkey.sword = gkey.eword = SIZE_MAX;
2838 gkey.translate = NULL;
2839 gkey.numeric = gkey.general_numeric = gkey.random = false;
2840 gkey.month = gkey.reverse = false;
2841 gkey.skipsblanks = gkey.skipeblanks = false;
2843 files = xnmalloc (argc, sizeof *files);
2847 /* Parse an operand as a file after "--" was seen; or if
2848 pedantic and a file was seen, unless the POSIX version
2849 predates 1003.1-2001 and -c was not seen and the operand is
2850 "-o FILE" or "-oFILE". */
2853 || (posixly_correct && nfiles != 0
2854 && ! (obsolete_usage
2857 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2858 && (argv[optind][2] || optind + 1 != argc)))
2859 || ((c = getopt_long (argc, argv, short_options,
2860 long_options, NULL))
2865 files[nfiles++] = argv[optind++];
2871 if (optarg[0] == '+')
2873 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2874 && ISDIGIT (argv[optind][1]));
2875 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2878 /* Treat +POS1 [-POS2] as a key if possible; but silently
2879 treat an operand as a file if it is not a valid +POS1. */
2880 key = key_init (&key_buf);
2881 s = parse_field_count (optarg + 1, &key->sword, NULL);
2883 s = parse_field_count (s + 1, &key->schar, NULL);
2884 if (! (key->sword | key->schar))
2885 key->sword = SIZE_MAX;
2886 if (! s || *set_ordering (s, key, bl_start))
2893 if (minus_pos_usage)
2895 char const *optarg1 = argv[optind++];
2896 s = parse_field_count (optarg1 + 1, &key->eword,
2897 N_("invalid number after `-'"));
2899 s = parse_field_count (s + 1, &key->echar,
2900 N_("invalid number after `.'"));
2901 if (*set_ordering (s, key, bl_end))
2902 badfieldspec (optarg1,
2903 N_("stray character in field spec"));
2910 files[nfiles++] = optarg;
2926 set_ordering (str, &gkey, bl_both);
2932 ? XARGMATCH ("--check", optarg, check_args, check_types)
2937 if (checkonly && checkonly != c)
2938 incompatible_options ("cC");
2942 case COMPRESS_PROGRAM_OPTION:
2943 if (compress_program && strcmp (compress_program, optarg) != 0)
2944 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2945 compress_program = optarg;
2949 key = key_init (&key_buf);
2952 s = parse_field_count (optarg, &key->sword,
2953 N_("invalid number at field start"));
2956 /* Provoke with `sort -k0' */
2957 badfieldspec (optarg, N_("field number is zero"));
2961 s = parse_field_count (s + 1, &key->schar,
2962 N_("invalid number after `.'"));
2965 /* Provoke with `sort -k1.0' */
2966 badfieldspec (optarg, N_("character offset is zero"));
2969 if (! (key->sword | key->schar))
2970 key->sword = SIZE_MAX;
2971 s = set_ordering (s, key, bl_start);
2974 key->eword = SIZE_MAX;
2980 s = parse_field_count (s + 1, &key->eword,
2981 N_("invalid number after `,'"));
2984 /* Provoke with `sort -k1,0' */
2985 badfieldspec (optarg, N_("field number is zero"));
2988 s = parse_field_count (s + 1, &key->echar,
2989 N_("invalid number after `.'"));
2992 /* `-k 2,3' is equivalent to `+1 -3'. */
2995 s = set_ordering (s, key, bl_end);
2998 badfieldspec (optarg, N_("stray character in field spec"));
3007 if (outfile && !STREQ (outfile, optarg))
3008 error (SORT_FAILURE, 0, _("multiple output files specified"));
3012 case RANDOM_SOURCE_OPTION:
3013 if (random_source && !STREQ (random_source, optarg))
3014 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3015 random_source = optarg;
3023 specify_sort_size (optarg);
3028 char newtab = optarg[0];
3030 error (SORT_FAILURE, 0, _("empty tab"));
3033 if (STREQ (optarg, "\\0"))
3037 /* Provoke with `sort -txx'. Complain about
3038 "multi-character tab" instead of "multibyte tab", so
3039 that the diagnostic's wording does not need to be
3040 changed once multibyte characters are supported. */
3041 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3045 if (tab != TAB_DEFAULT && tab != newtab)
3046 error (SORT_FAILURE, 0, _("incompatible tabs"));
3052 add_temp_dir (optarg);
3060 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3061 through Solaris 7. It is also accepted by many non-Solaris
3062 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3063 -y is marked as obsolete starting with Solaris 8 (1999), but is
3064 still accepted as of Solaris 10 prerelease (2004).
3066 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3067 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3068 and which in general ignores the argument after "-y" if it
3069 consists entirely of digits (it can even be empty). */
3070 if (optarg == argv[optind - 1])
3073 for (p = optarg; ISDIGIT (*p); p++)
3075 optind -= (*p != '\0');
3083 case_GETOPT_HELP_CHAR;
3085 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3088 usage (SORT_FAILURE);
3092 /* Inheritance of global options to individual keys. */
3093 for (key = keylist; key; key = key->next)
3095 if (! (key->ignore || key->translate
3096 || (key->skipsblanks | key->reverse
3097 | key->skipeblanks | key->month | key->numeric
3098 | key->general_numeric
3101 key->ignore = gkey.ignore;
3102 key->translate = gkey.translate;
3103 key->skipsblanks = gkey.skipsblanks;
3104 key->skipeblanks = gkey.skipeblanks;
3105 key->month = gkey.month;
3106 key->numeric = gkey.numeric;
3107 key->general_numeric = gkey.general_numeric;
3108 key->random = gkey.random;
3109 key->reverse = gkey.reverse;
3112 need_random |= key->random;
3115 if (!keylist && (gkey.ignore || gkey.translate
3116 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3117 | gkey.numeric | gkey.general_numeric
3121 need_random |= gkey.random;
3124 check_ordering_compatibility ();
3126 reverse = gkey.reverse;
3130 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3131 if (! randread_source)
3132 die (_("open failed"), random_source);
3135 if (temp_dir_count == 0)
3137 char const *tmp_dir = getenv ("TMPDIR");
3138 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3143 static char *minus = "-";
3152 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3153 quote (files[1]), checkonly);
3157 static char opts[] = {0, 'o', 0};
3158 opts[0] = checkonly;
3159 incompatible_options (opts);
3162 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3163 input is not properly sorted. */
3164 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3169 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3172 for (i = 0; i < nfiles; ++i)
3173 sortfiles[i].name = files[i];
3175 merge (sortfiles, 0, nfiles, outfile);
3178 sort (files, nfiles, outfile);
3180 if (have_read_stdin && fclose (stdin) == EOF)
3181 die (_("close failed"), "-");
3183 exit (EXIT_SUCCESS);