1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
42 #include "strnumcmp.h"
47 #if HAVE_SYS_RESOURCE_H
48 # include <sys/resource.h>
51 struct rlimit { size_t rlim_cur; };
52 # define getrlimit(Resource, Rlp) (-1)
55 /* The official name of this program (e.g., no `g' prefix). */
56 #define PROGRAM_NAME "sort"
59 proper_name ("Mike Haertel"), \
60 proper_name ("Paul Eggert")
62 #if HAVE_LANGINFO_CODESET
63 # include <langinfo.h>
66 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
69 # define SA_NOCLDSTOP 0
70 /* No sigprocmask. Always 'return' zero. */
71 # define sigprocmask(How, Set, Oset) (0)
73 # if ! HAVE_SIGINTERRUPT
74 # define siginterrupt(sig, flag) /* empty */
82 #define UCHAR_LIM (UCHAR_MAX + 1)
84 #ifndef DEFAULT_TMPDIR
85 # define DEFAULT_TMPDIR "/tmp"
91 /* POSIX says to exit with status 1 if invoked with -c and the
92 input is not properly sorted. */
93 SORT_OUT_OF_ORDER = 1,
95 /* POSIX says any other irregular exit must exit with a status
96 code greater than 1. */
102 /* The number of times we should try to fork a compression process
103 (we retry if the fork call fails). We don't _need_ to compress
104 temp files, this is just to reduce disk access, so this number
106 MAX_FORK_TRIES_COMPRESS = 2,
108 /* The number of times we should try to fork a decompression process.
109 If we can't fork a decompression process, we can't sort, so this
110 number should be big. */
111 MAX_FORK_TRIES_DECOMPRESS = 8
114 /* The representation of the decimal point in the current locale. */
115 static int decimal_point;
117 /* Thousands separator; if -1, then there isn't one. */
118 static int thousands_sep;
120 /* Nonzero if the corresponding locales are hard. */
121 static bool hard_LC_COLLATE;
123 static bool hard_LC_TIME;
126 #define NONZERO(x) ((x) != 0)
128 /* The kind of blanks for '-b' to skip in various options. */
129 enum blanktype { bl_start, bl_end, bl_both };
131 /* The character marking end of line. Default to \n. */
132 static char eolchar = '\n';
134 /* Lines are held in core as counted strings. */
137 char *text; /* Text of the line. */
138 size_t length; /* Length including final newline. */
139 char *keybeg; /* Start of first key. */
140 char *keylim; /* Limit of first key. */
146 char *buf; /* Dynamically allocated buffer,
147 partitioned into 3 regions:
150 - an array of lines, in reverse order. */
151 size_t used; /* Number of bytes used for input data. */
152 size_t nlines; /* Number of lines in the line array. */
153 size_t alloc; /* Number of bytes allocated. */
154 size_t left; /* Number of bytes left from previous reads. */
155 size_t line_bytes; /* Number of bytes to reserve for each line. */
156 bool eof; /* An EOF has been read. */
161 size_t sword; /* Zero-origin 'word' to start at. */
162 size_t schar; /* Additional characters to skip. */
163 size_t eword; /* Zero-origin first word after field. */
164 size_t echar; /* Additional characters in field. */
165 bool const *ignore; /* Boolean array of characters to ignore. */
166 char const *translate; /* Translation applied to characters. */
167 bool skipsblanks; /* Skip leading blanks when finding start. */
168 bool skipeblanks; /* Skip leading blanks when finding end. */
169 bool numeric; /* Flag for numeric comparison. Handle
170 strings of digits with optional decimal
171 point, but no exponential notation. */
172 bool random; /* Sort by random hash of key. */
173 bool general_numeric; /* Flag for general, numeric comparison.
174 Handle numbers in exponential notation. */
175 bool month; /* Flag for comparison by month name. */
176 bool reverse; /* Reverse the sense of comparison. */
177 struct keyfield *next; /* Next keyfield to try. */
186 /* FIXME: None of these tables work with multibyte character sets.
187 Also, there are many other bugs when handling multibyte characters.
188 One way to fix this is to rewrite `sort' to use wide characters
189 internally, but doing this with good performance is a bit
192 /* Table of blanks. */
193 static bool blanks[UCHAR_LIM];
195 /* Table of non-printing characters. */
196 static bool nonprinting[UCHAR_LIM];
198 /* Table of non-dictionary characters (not letters, digits, or blanks). */
199 static bool nondictionary[UCHAR_LIM];
201 /* Translation table folding lower case to upper. */
202 static char fold_toupper[UCHAR_LIM];
204 #define MONTHS_PER_YEAR 12
206 /* Table mapping month names to integers.
207 Alphabetic order allows binary search. */
208 static struct month monthtab[] =
224 /* During the merge phase, the number of files to merge at once. */
227 /* Minimum size for a merge or check buffer. */
228 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
230 /* Minimum sort size; the code might not work with smaller sizes. */
231 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
233 /* The number of bytes needed for a merge or check buffer, which can
234 function relatively efficiently even if it holds only one line. If
235 a longer line is seen, this value is increased. */
236 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
238 /* The approximate maximum number of bytes of main memory to use, as
239 specified by the user. Zero if the user has not specified a size. */
240 static size_t sort_size;
242 /* The guessed size for non-regular files. */
243 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
245 /* Array of directory names in which any temporary files are to be created. */
246 static char const **temp_dirs;
248 /* Number of temporary directory names used. */
249 static size_t temp_dir_count;
251 /* Number of allocated slots in temp_dirs. */
252 static size_t temp_dir_alloc;
254 /* Flag to reverse the order of all comparisons. */
257 /* Flag for stable sort. This turns off the last ditch bytewise
258 comparison of lines, and instead leaves lines in the same order
259 they were read if all keys compare equal. */
262 /* If TAB has this value, blanks separate fields. */
263 enum { TAB_DEFAULT = CHAR_MAX + 1 };
265 /* Tab character separating fields. If TAB_DEFAULT, then fields are
266 separated by the empty string between a non-blank character and a blank
268 static int tab = TAB_DEFAULT;
270 /* Flag to remove consecutive duplicate lines from the output.
271 Only the last of a sequence of equal lines will be output. */
274 /* Nonzero if any of the input files are the standard input. */
275 static bool have_read_stdin;
277 /* List of key field comparisons to be tried. */
278 static struct keyfield *keylist;
280 /* Program used to (de)compress temp files. Must accept -d. */
281 static char const *compress_program;
283 static void sortlines_temp (struct line *, size_t, struct line *);
285 /* Report MESSAGE for FILE, then clean up and exit.
286 If FILE is null, it represents standard output. */
288 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
290 die (char const *message, char const *file)
292 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
299 if (status != EXIT_SUCCESS)
300 fprintf (stderr, _("Try `%s --help' for more information.\n"),
305 Usage: %s [OPTION]... [FILE]...\n\
309 Write sorted concatenation of all FILE(s) to standard output.\n\
313 Mandatory arguments to long options are mandatory for short options too.\n\
320 -b, --ignore-leading-blanks ignore leading blanks\n\
321 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
322 -f, --ignore-case fold lower case to upper case characters\n\
325 -g, --general-numeric-sort compare according to general numerical value\n\
326 -i, --ignore-nonprinting consider only printable characters\n\
327 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
328 -n, --numeric-sort compare according to string numerical value\n\
329 -R, --random-sort sort by random hash of keys\n\
330 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
331 --sort=WORD sort according to WORD:\n\
332 general-numeric -g, month -M, numeric -n,\n\
334 -r, --reverse reverse the result of comparisons\n\
340 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
341 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
342 --compress-program=PROG compress temporaries with PROG;\n\
343 decompress them with PROG -d\n\
344 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
345 -m, --merge merge already sorted files; do not sort\n\
348 -o, --output=FILE write result to FILE instead of standard output\n\
349 -s, --stable stabilize sort by disabling last-resort comparison\n\
350 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
353 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
354 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
355 multiple options specify multiple directories\n\
356 -u, --unique with -c, check for strict ordering;\n\
357 without -c, output only the first of an equal run\n\
360 -z, --zero-terminated end lines with 0 byte, not newline\n\
362 fputs (HELP_OPTION_DESCRIPTION, stdout);
363 fputs (VERSION_OPTION_DESCRIPTION, stdout);
366 POS is F[.C][OPTS], where F is the field number and C the character position\n\
367 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
368 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
369 one or more single-letter ordering options, which override global ordering\n\
370 options for that key. If no key is given, use the entire line as the key.\n\
372 SIZE may be followed by the following multiplicative suffixes:\n\
375 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
377 With no FILE, or when FILE is -, read standard input.\n\
380 The locale specified by the environment affects sort order.\n\
381 Set LC_ALL=C to get the traditional sort order that uses\n\
382 native byte values.\n\
384 emit_bug_reporting_address ();
390 /* For long options that have no equivalent short option, use a
391 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
394 CHECK_OPTION = CHAR_MAX + 1,
395 COMPRESS_PROGRAM_OPTION,
396 RANDOM_SOURCE_OPTION,
400 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
402 static struct option const long_options[] =
404 {"ignore-leading-blanks", no_argument, NULL, 'b'},
405 {"check", optional_argument, NULL, CHECK_OPTION},
406 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
407 {"dictionary-order", no_argument, NULL, 'd'},
408 {"ignore-case", no_argument, NULL, 'f'},
409 {"general-numeric-sort", no_argument, NULL, 'g'},
410 {"ignore-nonprinting", no_argument, NULL, 'i'},
411 {"key", required_argument, NULL, 'k'},
412 {"merge", no_argument, NULL, 'm'},
413 {"month-sort", no_argument, NULL, 'M'},
414 {"numeric-sort", no_argument, NULL, 'n'},
415 {"random-sort", no_argument, NULL, 'R'},
416 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
417 {"sort", required_argument, NULL, SORT_OPTION},
418 {"output", required_argument, NULL, 'o'},
419 {"reverse", no_argument, NULL, 'r'},
420 {"stable", no_argument, NULL, 's'},
421 {"buffer-size", required_argument, NULL, 'S'},
422 {"field-separator", required_argument, NULL, 't'},
423 {"temporary-directory", required_argument, NULL, 'T'},
424 {"unique", no_argument, NULL, 'u'},
425 {"zero-terminated", no_argument, NULL, 'z'},
426 {GETOPT_HELP_OPTION_DECL},
427 {GETOPT_VERSION_OPTION_DECL},
431 static char const *const check_args[] =
433 "quiet", "silent", "diagnose-first", NULL
435 static char const check_types[] =
439 ARGMATCH_VERIFY (check_args, check_types);
441 static char const *const sort_args[] =
443 "general-numeric", "month", "numeric", "random", NULL
445 static char const sort_types[] =
449 ARGMATCH_VERIFY (sort_args, sort_types);
451 /* The set of signals that are caught. */
452 static sigset_t caught_signals;
454 /* Critical section status. */
461 /* Enter a critical section. */
462 static struct cs_status
465 struct cs_status status;
466 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
470 /* Leave a critical section. */
472 cs_leave (struct cs_status status)
476 /* Ignore failure when restoring the signal mask. */
477 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
481 /* The list of temporary files. */
484 struct tempnode *volatile next;
485 pid_t pid; /* If compressed, the pid of compressor, else zero */
486 char name[1]; /* Actual size is 1 + file name length. */
488 static struct tempnode *volatile temphead;
489 static struct tempnode *volatile *temptail = &temphead;
494 pid_t pid; /* If compressed, the pid of compressor, else zero */
497 /* A table where we store compression process states. We clean up all
498 processes in a timely manner so as not to exhaust system resources,
499 so we store the info on whether the process is still running, or has
501 static Hash_table *proctab;
503 enum { INIT_PROCTAB_SIZE = 47 };
505 enum procstate { ALIVE, ZOMBIE };
507 /* A proctab entry. The COUNT field is there in case we fork a new
508 compression process that has the same PID as an old zombie process
509 that is still in the table (because the process to decompress the
510 temp file it was associated with hasn't started yet). */
514 enum procstate state;
519 proctab_hasher (const void *entry, size_t tabsize)
521 const struct procnode *node = entry;
522 return node->pid % tabsize;
526 proctab_comparator (const void *e1, const void *e2)
528 const struct procnode *n1 = e1, *n2 = e2;
529 return n1->pid == n2->pid;
532 /* The total number of forked processes (compressors and decompressors)
533 that have not been reaped yet. */
534 static size_t nprocs;
536 /* The number of child processes we'll allow before we try to reap some. */
537 enum { MAX_PROCS_BEFORE_REAP = 2 };
539 /* If 0 < PID, wait for the child process with that PID to exit.
540 If PID is -1, clean up a random child process which has finished and
541 return the process ID of that child. If PID is -1 and no processes
542 have quit yet, return 0 without waiting. */
548 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
551 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
555 if (! WIFEXITED (status) || WEXITSTATUS (status))
556 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
564 /* Add the PID of a running compression process to proctab, or update
565 the entry COUNT and STATE fields if it's already there. This also
566 creates the table for us the first time it's called. */
569 register_proc (pid_t pid)
571 struct procnode test, *node;
575 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
584 node = hash_lookup (proctab, &test);
592 node = xmalloc (sizeof *node);
596 hash_insert (proctab, node);
600 /* This is called when we reap a random process. We don't know
601 whether we have reaped a compression process or a decompression
602 process until we look in the table. If there's an ALIVE entry for
603 it, then we have reaped a compression process, so change the state
604 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
607 update_proc (pid_t pid)
609 struct procnode test, *node;
612 node = hash_lookup (proctab, &test);
614 node->state = ZOMBIE;
617 /* This is for when we need to wait for a compression process to exit.
618 If it has a ZOMBIE entry in the table then it's already dead and has
619 been reaped. Note that if there's an ALIVE entry for it, it still may
620 already have died and been reaped if a second process was created with
621 the same PID. This is probably exceedingly rare, but to be on the safe
622 side we will have to wait for any compression process with this PID. */
625 wait_proc (pid_t pid)
627 struct procnode test, *node;
630 node = hash_lookup (proctab, &test);
631 if (node->state == ALIVE)
634 node->state = ZOMBIE;
637 hash_delete (proctab, node);
642 /* Keep reaping finished children as long as there are more to reap.
643 This doesn't block waiting for any of them, it only reaps those
644 that are already dead. */
651 while (0 < nprocs && (pid = reap (-1)))
655 /* Clean up any remaining temporary files. */
660 struct tempnode const *node;
662 for (node = temphead; node; node = node->next)
667 /* Cleanup actions to take when exiting. */
674 /* Clean up any remaining temporary files in a critical section so
675 that a signal handler does not try to clean them too. */
676 struct cs_status cs = cs_enter ();
684 /* Create a new temporary file, returning its newly allocated tempnode.
685 Store into *PFD the file descriptor open for writing. */
687 static struct tempnode *
688 create_temp_file (int *pfd)
690 static char const slashbase[] = "/sortXXXXXX";
691 static size_t temp_dir_index;
694 char const *temp_dir = temp_dirs[temp_dir_index];
695 size_t len = strlen (temp_dir);
696 struct tempnode *node =
697 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
698 char *file = node->name;
701 memcpy (file, temp_dir, len);
702 memcpy (file + len, slashbase, sizeof slashbase);
705 if (++temp_dir_index == temp_dir_count)
708 /* Create the temporary file in a critical section, to avoid races. */
714 temptail = &node->next;
721 die (_("cannot create temporary file"), file);
727 /* Return a stream for FILE, opened with mode HOW. A null FILE means
728 standard output; HOW should be "w". When opening for input, "-"
729 means standard input. To avoid confusion, do not return file
730 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
731 opening an ordinary FILE. */
734 xfopen (const char *file, const char *how)
740 else if (STREQ (file, "-") && *how == 'r')
742 have_read_stdin = true;
747 fp = fopen (file, how);
749 die (_("open failed"), file);
755 /* Close FP, whose name is FILE, and report any errors. */
758 xfclose (FILE *fp, char const *file)
763 /* Allow reading stdin from tty more than once. */
769 /* Don't close stdout just yet. close_stdout does that. */
770 if (fflush (fp) != 0)
771 die (_("fflush failed"), file);
775 if (fclose (fp) != 0)
776 die (_("close failed"), file);
782 dup2_or_die (int oldfd, int newfd)
784 if (dup2 (oldfd, newfd) < 0)
785 error (SORT_FAILURE, errno, _("dup2 failed"));
788 /* Fork a child process for piping to and do common cleanup. The
789 TRIES parameter tells us how many times to try to fork before
790 giving up. Return the PID of the child or -1 if fork failed. */
793 pipe_fork (int pipefds[2], size_t tries)
795 #if HAVE_WORKING_FORK
796 struct tempnode *saved_temphead;
798 unsigned int wait_retry = 1;
799 pid_t pid IF_LINT (= -1);
802 if (pipe (pipefds) < 0)
807 /* This is so the child process won't delete our temp files
808 if it receives a signal before exec-ing. */
810 saved_temphead = temphead;
816 temphead = saved_temphead;
821 if (0 <= pid || errno != EAGAIN)
838 close (STDIN_FILENO);
839 close (STDOUT_FILENO);
846 #else /* ! HAVE_WORKING_FORK */
851 /* Create a temporary file and start a compression program to filter output
852 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
853 set it to the PID of the newly-created process. */
856 create_temp (FILE **pfp, pid_t *ppid)
859 struct tempnode *node = create_temp_file (&tempfd);
860 char *name = node->name;
862 if (compress_program)
866 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
873 register_proc (node->pid);
875 else if (node->pid == 0)
878 dup2_or_die (tempfd, STDOUT_FILENO);
880 dup2_or_die (pipefds[0], STDIN_FILENO);
883 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
884 error (SORT_FAILURE, errno, _("couldn't execute %s"),
891 *pfp = fdopen (tempfd, "w");
893 die (_("couldn't create temporary file"), name);
901 /* Open a compressed temp file and start a decompression process through
902 which to filter the input. PID must be the valid processes ID of the
903 process used to compress the file. */
906 open_temp (const char *name, pid_t pid)
908 int tempfd, pipefds[2];
914 tempfd = open (name, O_RDONLY);
916 die (_("couldn't open temporary file"), name);
918 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
924 else if (child_pid == 0)
927 dup2_or_die (tempfd, STDIN_FILENO);
929 dup2_or_die (pipefds[1], STDOUT_FILENO);
932 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
933 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
937 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
940 fp = fdopen (pipefds[0], "r");
942 die (_("couldn't create temporary file"), name);
948 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
950 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
951 die (_("write failed"), output_file);
954 /* Append DIR to the array of temporary directory names. */
956 add_temp_dir (char const *dir)
958 if (temp_dir_count == temp_dir_alloc)
959 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
961 temp_dirs[temp_dir_count++] = dir;
964 /* Remove NAME from the list of temporary files. */
967 zaptemp (const char *name)
969 struct tempnode *volatile *pnode;
970 struct tempnode *node;
971 struct tempnode *next;
973 int unlink_errno = 0;
976 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
979 /* Unlink the temporary file in a critical section to avoid races. */
982 unlink_status = unlink (name);
983 unlink_errno = errno;
987 if (unlink_status != 0)
988 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
997 struct_month_cmp (const void *m1, const void *m2)
999 struct month const *month1 = m1;
1000 struct month const *month2 = m2;
1001 return strcmp (month1->name, month2->name);
1006 /* Initialize the character class tables. */
1013 for (i = 0; i < UCHAR_LIM; ++i)
1015 blanks[i] = !! isblank (i);
1016 nonprinting[i] = ! isprint (i);
1017 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1018 fold_toupper[i] = toupper (i);
1021 #if HAVE_NL_LANGINFO
1022 /* If we're not in the "C" locale, read different names for months. */
1025 for (i = 0; i < MONTHS_PER_YEAR; i++)
1032 s = (char *) nl_langinfo (ABMON_1 + i);
1034 monthtab[i].name = name = xmalloc (s_len + 1);
1035 monthtab[i].val = i + 1;
1037 for (j = 0; j < s_len; j++)
1038 name[j] = fold_toupper[to_uchar (s[j])];
1041 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1042 sizeof *monthtab, struct_month_cmp);
1047 /* Specify the amount of main memory to use when sorting. */
1049 specify_sort_size (int oi, char c, char const *s)
1053 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1055 /* The default unit is KiB. */
1056 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1058 if (n <= UINTMAX_MAX / 1024)
1061 e = LONGINT_OVERFLOW;
1064 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1065 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1074 double mem = physmem_total () * n / 100;
1076 /* Use "<", not "<=", to avoid problems with rounding. */
1077 if (mem < UINTMAX_MAX)
1083 e = LONGINT_OVERFLOW;
1088 if (e == LONGINT_OK)
1090 /* If multiple sort sizes are specified, take the maximum, so
1091 that option order does not matter. */
1098 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1102 e = LONGINT_OVERFLOW;
1105 xstrtol_fatal (e, oi, c, long_options, s);
1108 /* Return the default sort size. */
1110 default_sort_size (void)
1112 /* Let MEM be available memory or 1/8 of total memory, whichever
1114 double avail = physmem_available ();
1115 double total = physmem_total ();
1116 double mem = MAX (avail, total / 8);
1117 struct rlimit rlimit;
1119 /* Let SIZE be MEM, but no more than the maximum object size or
1120 system resource limits. Avoid the MIN macro here, as it is not
1121 quite right when only one argument is floating point. Don't
1122 bother to check for values like RLIM_INFINITY since in practice
1123 they are not much less than SIZE_MAX. */
1124 size_t size = SIZE_MAX;
1127 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1128 size = rlimit.rlim_cur;
1130 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1131 size = rlimit.rlim_cur;
1134 /* Leave a large safety margin for the above limits, as failure can
1135 occur when they are exceeded. */
1139 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1140 Exceeding RSS is not fatal, but can be quite slow. */
1141 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1142 size = rlimit.rlim_cur / 16 * 15;
1145 /* Use no less than the minimum. */
1146 return MAX (size, MIN_SORT_SIZE);
1149 /* Return the sort buffer size to use with the input files identified
1150 by FPS and FILES, which are alternate names of the same files.
1151 NFILES gives the number of input files; NFPS may be less. Assume
1152 that each input line requires LINE_BYTES extra bytes' worth of line
1153 information. Do not exceed the size bound specified by the user
1154 (or a default size bound, if the user does not specify one). */
1157 sort_buffer_size (FILE *const *fps, size_t nfps,
1158 char *const *files, size_t nfiles,
1161 /* A bound on the input size. If zero, the bound hasn't been
1163 static size_t size_bound;
1165 /* In the worst case, each input byte is a newline. */
1166 size_t worst_case_per_input_byte = line_bytes + 1;
1168 /* Keep enough room for one extra input line and an extra byte.
1169 This extra room might be needed when preparing to read EOF. */
1170 size_t size = worst_case_per_input_byte + 1;
1174 for (i = 0; i < nfiles; i++)
1180 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1181 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1182 : stat (files[i], &st))
1184 die (_("stat failed"), files[i]);
1186 if (S_ISREG (st.st_mode))
1187 file_size = st.st_size;
1190 /* The file has unknown size. If the user specified a sort
1191 buffer size, use that; otherwise, guess the size. */
1194 file_size = INPUT_FILE_SIZE_GUESS;
1199 size_bound = sort_size;
1201 size_bound = default_sort_size ();
1204 /* Add the amount of memory needed to represent the worst case
1205 where the input consists entirely of newlines followed by a
1206 single non-newline. Check for overflow. */
1207 worst_case = file_size * worst_case_per_input_byte + 1;
1208 if (file_size != worst_case / worst_case_per_input_byte
1209 || size_bound - size <= worst_case)
1217 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1218 must be at least sizeof (struct line). Allocate ALLOC bytes
1222 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1224 /* Ensure that the line array is properly aligned. If the desired
1225 size cannot be allocated, repeatedly halve it until allocation
1226 succeeds. The smaller allocation may hurt overall performance,
1227 but that's better than failing. */
1230 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1231 buf->buf = malloc (alloc);
1235 if (alloc <= line_bytes + 1)
1239 buf->line_bytes = line_bytes;
1241 buf->used = buf->left = buf->nlines = 0;
1245 /* Return one past the limit of the line array. */
1247 static inline struct line *
1248 buffer_linelim (struct buffer const *buf)
1250 return (struct line *) (buf->buf + buf->alloc);
1253 /* Return a pointer to the first character of the field specified
1257 begfield (const struct line *line, const struct keyfield *key)
1259 char *ptr = line->text, *lim = ptr + line->length - 1;
1260 size_t sword = key->sword;
1261 size_t schar = key->schar;
1262 size_t remaining_bytes;
1264 /* The leading field separator itself is included in a field when -t
1267 if (tab != TAB_DEFAULT)
1268 while (ptr < lim && sword--)
1270 while (ptr < lim && *ptr != tab)
1276 while (ptr < lim && sword--)
1278 while (ptr < lim && blanks[to_uchar (*ptr)])
1280 while (ptr < lim && !blanks[to_uchar (*ptr)])
1284 if (key->skipsblanks)
1285 while (ptr < lim && blanks[to_uchar (*ptr)])
1288 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1289 remaining_bytes = lim - ptr;
1290 if (schar < remaining_bytes)
1298 /* Return the limit of (a pointer to the first character after) the field
1299 in LINE specified by KEY. */
1302 limfield (const struct line *line, const struct keyfield *key)
1304 char *ptr = line->text, *lim = ptr + line->length - 1;
1305 size_t eword = key->eword, echar = key->echar;
1306 size_t remaining_bytes;
1308 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1309 whichever comes first. If there are more than EWORD fields, leave
1310 PTR pointing at the beginning of the field having zero-based index,
1311 EWORD. If a delimiter character was specified (via -t), then that
1312 `beginning' is the first character following the delimiting TAB.
1313 Otherwise, leave PTR pointing at the first `blank' character after
1314 the preceding field. */
1315 if (tab != TAB_DEFAULT)
1316 while (ptr < lim && eword--)
1318 while (ptr < lim && *ptr != tab)
1320 if (ptr < lim && (eword | echar))
1324 while (ptr < lim && eword--)
1326 while (ptr < lim && blanks[to_uchar (*ptr)])
1328 while (ptr < lim && !blanks[to_uchar (*ptr)])
1332 #ifdef POSIX_UNSPECIFIED
1333 /* The following block of code makes GNU sort incompatible with
1334 standard Unix sort, so it's ifdef'd out for now.
1335 The POSIX spec isn't clear on how to interpret this.
1336 FIXME: request clarification.
1338 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1339 Date: Thu, 30 May 96 12:20:41 -0400
1340 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1342 [...]I believe I've found another bug in `sort'.
1347 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1350 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1354 Unix sort produced the answer I expected: sort on the single character
1355 in column 7. GNU sort produced different results, because it disagrees
1356 on the interpretation of the key-end spec "M.N". Unix sort reads this
1357 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1358 "skip M-1 fields, then either N-1 characters or the rest of the current
1359 field, whichever comes first". This extra clause applies only to
1360 key-ends, not key-starts.
1363 /* Make LIM point to the end of (one byte past) the current field. */
1364 if (tab != TAB_DEFAULT)
1367 newlim = memchr (ptr, tab, lim - ptr);
1375 while (newlim < lim && blanks[to_uchar (*newlim)])
1377 while (newlim < lim && !blanks[to_uchar (*newlim)])
1383 /* If we're ignoring leading blanks when computing the End
1384 of the field, don't start counting bytes until after skipping
1385 past any leading blanks. */
1386 if (key->skipeblanks)
1387 while (ptr < lim && blanks[to_uchar (*ptr)])
1390 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1391 remaining_bytes = lim - ptr;
1392 if (echar < remaining_bytes)
1400 /* Fill BUF reading from FP, moving buf->left bytes from the end
1401 of buf->buf to the beginning first. If EOF is reached and the
1402 file wasn't terminated by a newline, supply one. Set up BUF's line
1403 table too. FILE is the name of the file corresponding to FP.
1404 Return true if some input was read. */
1407 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1409 struct keyfield const *key = keylist;
1411 size_t line_bytes = buf->line_bytes;
1412 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1417 if (buf->used != buf->left)
1419 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1420 buf->used = buf->left;
1426 char *ptr = buf->buf + buf->used;
1427 struct line *linelim = buffer_linelim (buf);
1428 struct line *line = linelim - buf->nlines;
1429 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1430 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1432 while (line_bytes + 1 < avail)
1434 /* Read as many bytes as possible, but do not read so many
1435 bytes that there might not be enough room for the
1436 corresponding line array. The worst case is when the
1437 rest of the input file consists entirely of newlines,
1438 except that the last byte is not a newline. */
1439 size_t readsize = (avail - 1) / (line_bytes + 1);
1440 size_t bytes_read = fread (ptr, 1, readsize, fp);
1441 char *ptrlim = ptr + bytes_read;
1443 avail -= bytes_read;
1445 if (bytes_read != readsize)
1448 die (_("read failed"), file);
1452 if (buf->buf == ptrlim)
1454 if (ptrlim[-1] != eol)
1459 /* Find and record each line in the just-read input. */
1460 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1464 line->text = line_start;
1465 line->length = ptr - line_start;
1466 mergesize = MAX (mergesize, line->length);
1467 avail -= line_bytes;
1471 /* Precompute the position of the first key for
1473 line->keylim = (key->eword == SIZE_MAX
1475 : limfield (line, key));
1477 if (key->sword != SIZE_MAX)
1478 line->keybeg = begfield (line, key);
1481 if (key->skipsblanks)
1482 while (blanks[to_uchar (*line_start)])
1484 line->keybeg = line_start;
1496 buf->used = ptr - buf->buf;
1497 buf->nlines = buffer_linelim (buf) - line;
1498 if (buf->nlines != 0)
1500 buf->left = ptr - line_start;
1501 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1506 /* The current input line is too long to fit in the buffer.
1507 Double the buffer size and try again, keeping it properly
1509 size_t line_alloc = buf->alloc / sizeof (struct line);
1510 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1511 buf->alloc = line_alloc * sizeof (struct line);
1516 /* Compare strings A and B as numbers without explicitly converting them to
1517 machine numbers. Comparatively slow for short strings, but asymptotically
1521 numcompare (const char *a, const char *b)
1523 while (blanks[to_uchar (*a)])
1525 while (blanks[to_uchar (*b)])
1528 return strnumcmp (a, b, decimal_point, thousands_sep);
1532 general_numcompare (const char *sa, const char *sb)
1534 /* FIXME: add option to warn about failed conversions. */
1535 /* FIXME: maybe add option to try expensive FP conversion
1536 only if A and B can't be compared more cheaply/accurately. */
1540 double a = strtod (sa, &ea);
1541 double b = strtod (sb, &eb);
1543 /* Put conversion errors at the start of the collating sequence. */
1545 return sb == eb ? 0 : -1;
1549 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1550 conversion errors but before numbers; sort them by internal
1551 bit-pattern, for lack of a more portable alternative. */
1557 : memcmp ((char *) &a, (char *) &b, sizeof a));
1560 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1561 Return 0 if the name in S is not recognized. */
1564 getmonth (char const *month, size_t len)
1567 size_t hi = MONTHS_PER_YEAR;
1568 char const *monthlim = month + len;
1572 if (month == monthlim)
1574 if (!blanks[to_uchar (*month)])
1581 size_t ix = (lo + hi) / 2;
1582 char const *m = month;
1583 char const *n = monthtab[ix].name;
1588 return monthtab[ix].val;
1589 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1594 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1606 /* A source of random data. */
1607 static struct randread_source *randread_source;
1609 /* Return the Ith randomly-generated state. The caller must invoke
1610 random_state (H) for all H less than I before invoking random_state
1613 static struct md5_ctx
1614 random_state (size_t i)
1616 /* An array of states resulting from the random data, and counts of
1617 its used and allocated members. */
1618 static struct md5_ctx *state;
1620 static size_t allocated;
1622 struct md5_ctx *s = &state[i];
1626 unsigned char buf[MD5_DIGEST_SIZE];
1632 state = X2NREALLOC (state, &allocated);
1636 randread (randread_source, buf, sizeof buf);
1638 md5_process_bytes (buf, sizeof buf, s);
1644 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1645 with length LENGTHB. Return negative if less, zero if equal,
1646 positive if greater. */
1649 cmp_hashes (char const *texta, size_t lena,
1650 char const *textb, size_t lenb)
1652 /* Try random hashes until a pair of hashes disagree. But if the
1653 first pair of random hashes agree, check whether the keys are
1654 identical and if so report no difference. */
1659 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1660 struct md5_ctx s[2];
1661 s[0] = s[1] = random_state (i);
1662 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1663 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1664 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1667 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1674 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1675 using one or more random hash functions. */
1678 compare_random (char *restrict texta, size_t lena,
1679 char *restrict textb, size_t lenb)
1683 if (! hard_LC_COLLATE)
1684 diff = cmp_hashes (texta, lena, textb, lenb);
1687 /* Transform the text into the basis of comparison, so that byte
1688 strings that would otherwise considered to be equal are
1689 considered equal here even if their bytes differ. */
1692 char stackbuf[4000];
1693 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1694 bool a_fits = tlena <= sizeof stackbuf;
1695 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1696 (a_fits ? sizeof stackbuf - tlena : 0),
1699 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1703 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1704 faster by avoiding the need for an extra buffer copy. */
1705 buf = xmalloc (tlena + tlenb + 1);
1706 xmemxfrm (buf, tlena + 1, texta, lena);
1707 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1710 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1712 if (buf != stackbuf)
1719 /* Compare two lines A and B trying every key in sequence until there
1720 are no more keys or a difference is found. */
1723 keycompare (const struct line *a, const struct line *b)
1725 struct keyfield const *key = keylist;
1727 /* For the first iteration only, the key positions have been
1728 precomputed for us. */
1729 char *texta = a->keybeg;
1730 char *textb = b->keybeg;
1731 char *lima = a->keylim;
1732 char *limb = b->keylim;
1738 char const *translate = key->translate;
1739 bool const *ignore = key->ignore;
1741 /* Find the lengths. */
1742 size_t lena = lima <= texta ? 0 : lima - texta;
1743 size_t lenb = limb <= textb ? 0 : limb - textb;
1745 /* Actually compare the fields. */
1748 diff = compare_random (texta, lena, textb, lenb);
1749 else if (key->numeric | key->general_numeric)
1751 char savea = *lima, saveb = *limb;
1753 *lima = *limb = '\0';
1754 diff = ((key->numeric ? numcompare : general_numcompare)
1756 *lima = savea, *limb = saveb;
1758 else if (key->month)
1759 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1760 /* Sorting like this may become slow, so in a simple locale the user
1761 can select a faster sort that is similar to ascii sort. */
1762 else if (hard_LC_COLLATE)
1764 if (ignore || translate)
1767 size_t size = lena + 1 + lenb + 1;
1768 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1769 char *copy_b = copy_a + lena + 1;
1770 size_t new_len_a, new_len_b, i;
1772 /* Ignore and/or translate chars before comparing. */
1773 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1777 copy_a[new_len_a] = (translate
1778 ? translate[to_uchar (texta[i])]
1780 if (!ignore || !ignore[to_uchar (texta[i])])
1785 copy_b[new_len_b] = (translate
1786 ? translate[to_uchar (textb[i])]
1788 if (!ignore || !ignore[to_uchar (textb[i])])
1793 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1795 if (sizeof buf < size)
1799 diff = - NONZERO (lenb);
1803 diff = xmemcoll (texta, lena, textb, lenb);
1807 #define CMP_WITH_IGNORE(A, B) \
1812 while (texta < lima && ignore[to_uchar (*texta)]) \
1814 while (textb < limb && ignore[to_uchar (*textb)]) \
1816 if (! (texta < lima && textb < limb)) \
1818 diff = to_uchar (A) - to_uchar (B); \
1825 diff = (texta < lima) - (textb < limb); \
1830 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1831 translate[to_uchar (*textb)]);
1833 CMP_WITH_IGNORE (*texta, *textb);
1836 diff = - NONZERO (lenb);
1843 while (texta < lima && textb < limb)
1845 diff = (to_uchar (translate[to_uchar (*texta++)])
1846 - to_uchar (translate[to_uchar (*textb++)]));
1853 diff = memcmp (texta, textb, MIN (lena, lenb));
1857 diff = lena < lenb ? -1 : lena != lenb;
1867 /* Find the beginning and limit of the next field. */
1868 if (key->eword != SIZE_MAX)
1869 lima = limfield (a, key), limb = limfield (b, key);
1871 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1873 if (key->sword != SIZE_MAX)
1874 texta = begfield (a, key), textb = begfield (b, key);
1877 texta = a->text, textb = b->text;
1878 if (key->skipsblanks)
1880 while (texta < lima && blanks[to_uchar (*texta)])
1882 while (textb < limb && blanks[to_uchar (*textb)])
1893 return key->reverse ? -diff : diff;
1896 /* Compare two lines A and B, returning negative, zero, or positive
1897 depending on whether A compares less than, equal to, or greater than B. */
1900 compare (const struct line *a, const struct line *b)
1905 /* First try to compare on the specified keys (if any).
1906 The only two cases with no key at all are unadorned sort,
1907 and unadorned sort -r. */
1910 diff = keycompare (a, b);
1911 if (diff | unique | stable)
1915 /* If the keys all compare equal (or no keys were specified)
1916 fall through to the default comparison. */
1917 alen = a->length - 1, blen = b->length - 1;
1920 diff = - NONZERO (blen);
1923 else if (hard_LC_COLLATE)
1924 diff = xmemcoll (a->text, alen, b->text, blen);
1925 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1926 diff = alen < blen ? -1 : alen != blen;
1928 return reverse ? -diff : diff;
1931 /* Check that the lines read from FILE_NAME come in order. Return
1932 true if they are in order. If CHECKONLY == 'c', also print a
1933 diagnostic (FILE_NAME, line number, contents of line) to stderr if
1934 they are not in order. */
1937 check (char const *file_name, char checkonly)
1939 FILE *fp = xfopen (file_name, "r");
1940 struct buffer buf; /* Input buffer. */
1941 struct line temp; /* Copy of previous line. */
1943 uintmax_t line_number = 0;
1944 struct keyfield const *key = keylist;
1945 bool nonunique = ! unique;
1946 bool ordered = true;
1948 initbuf (&buf, sizeof (struct line),
1949 MAX (merge_buffer_size, sort_size));
1952 while (fillbuf (&buf, fp, file_name))
1954 struct line const *line = buffer_linelim (&buf);
1955 struct line const *linebase = line - buf.nlines;
1957 /* Make sure the line saved from the old buffer contents is
1958 less than or equal to the first line of the new buffer. */
1959 if (alloc && nonunique <= compare (&temp, line - 1))
1963 if (checkonly == 'c')
1965 struct line const *disorder_line = line - 1;
1966 uintmax_t disorder_line_number =
1967 buffer_linelim (&buf) - disorder_line + line_number;
1968 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1969 fprintf (stderr, _("%s: %s:%s: disorder: "),
1970 program_name, file_name,
1971 umaxtostr (disorder_line_number, hr_buf));
1972 write_bytes (disorder_line->text, disorder_line->length,
1973 stderr, _("standard error"));
1981 /* Compare each line in the buffer with its successor. */
1982 while (linebase < --line)
1983 if (nonunique <= compare (line, line - 1))
1984 goto found_disorder;
1986 line_number += buf.nlines;
1988 /* Save the last line of the buffer. */
1989 if (alloc < line->length)
1996 alloc = line->length;
2000 while (alloc < line->length);
2002 temp.text = xrealloc (temp.text, alloc);
2004 memcpy (temp.text, line->text, line->length);
2005 temp.length = line->length;
2008 temp.keybeg = temp.text + (line->keybeg - line->text);
2009 temp.keylim = temp.text + (line->keylim - line->text);
2013 xfclose (fp, file_name);
2019 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2020 files (all of which are at the start of the FILES array), and
2021 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2022 Close input and output files before returning.
2023 OUTPUT_FILE gives the name of the output file. If it is NULL,
2024 the output file is standard output. If OFP is NULL, the output
2025 file has not been opened yet (or written to, if standard output). */
2028 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2029 FILE *ofp, char const *output_file)
2031 FILE *fps[NMERGE]; /* Input streams for each file. */
2032 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2033 struct line saved; /* Saved line storage for unique check. */
2034 struct line const *savedline = NULL;
2035 /* &saved if there is a saved line. */
2036 size_t savealloc = 0; /* Size allocated for the saved line. */
2037 struct line const *cur[NMERGE]; /* Current line in each line table. */
2038 struct line const *base[NMERGE]; /* Base of each line table. */
2039 size_t ord[NMERGE]; /* Table representing a permutation of fps,
2040 such that cur[ord[0]] is the smallest line
2041 and will be next output. */
2045 struct keyfield const *key = keylist;
2048 /* Read initial lines from each input file. */
2049 for (i = 0; i < nfiles; )
2051 fps[i] = (files[i].pid
2052 ? open_temp (files[i].name, files[i].pid)
2053 : xfopen (files[i].name, "r"));
2054 initbuf (&buffer[i], sizeof (struct line),
2055 MAX (merge_buffer_size, sort_size / nfiles));
2056 if (fillbuf (&buffer[i], fps[i], files[i].name))
2058 struct line const *linelim = buffer_linelim (&buffer[i]);
2059 cur[i] = linelim - 1;
2060 base[i] = linelim - buffer[i].nlines;
2065 /* fps[i] is empty; eliminate it from future consideration. */
2066 xfclose (fps[i], files[i].name);
2070 zaptemp (files[i].name);
2072 free (buffer[i].buf);
2074 for (j = i; j < nfiles; ++j)
2075 files[j] = files[j + 1];
2080 ofp = xfopen (output_file, "w");
2082 /* Set up the ord table according to comparisons among input lines.
2083 Since this only reorders two items if one is strictly greater than
2084 the other, it is stable. */
2085 for (i = 0; i < nfiles; ++i)
2087 for (i = 1; i < nfiles; ++i)
2088 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2089 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2091 /* Repeatedly output the smallest line until no input remains. */
2094 struct line const *smallest = cur[ord[0]];
2096 /* If uniquified output is turned on, output only the first of
2097 an identical series of lines. */
2100 if (savedline && compare (savedline, smallest))
2103 write_bytes (saved.text, saved.length, ofp, output_file);
2108 if (savealloc < smallest->length)
2113 savealloc = smallest->length;
2116 while ((savealloc *= 2) < smallest->length);
2118 saved.text = xrealloc (saved.text, savealloc);
2120 saved.length = smallest->length;
2121 memcpy (saved.text, smallest->text, saved.length);
2125 saved.text + (smallest->keybeg - smallest->text);
2127 saved.text + (smallest->keylim - smallest->text);
2132 write_bytes (smallest->text, smallest->length, ofp, output_file);
2134 /* Check if we need to read more lines into core. */
2135 if (base[ord[0]] < smallest)
2136 cur[ord[0]] = smallest - 1;
2139 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2141 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2142 cur[ord[0]] = linelim - 1;
2143 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2147 /* We reached EOF on fps[ord[0]]. */
2148 for (i = 1; i < nfiles; ++i)
2149 if (ord[i] > ord[0])
2152 xfclose (fps[ord[0]], files[ord[0]].name);
2153 if (ord[0] < ntemps)
2156 zaptemp (files[ord[0]].name);
2158 free (buffer[ord[0]].buf);
2159 for (i = ord[0]; i < nfiles; ++i)
2161 fps[i] = fps[i + 1];
2162 files[i] = files[i + 1];
2163 buffer[i] = buffer[i + 1];
2164 cur[i] = cur[i + 1];
2165 base[i] = base[i + 1];
2167 for (i = 0; i < nfiles; ++i)
2168 ord[i] = ord[i + 1];
2173 /* The new line just read in may be larger than other lines
2174 already in main memory; push it back in the queue until we
2175 encounter a line larger than it. Optimize for the common
2176 case where the new line is smallest. */
2181 size_t ord0 = ord[0];
2182 size_t count_of_smaller_lines;
2186 int cmp = compare (cur[ord0], cur[ord[probe]]);
2187 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2191 probe = (lo + hi) / 2;
2194 count_of_smaller_lines = lo - 1;
2195 for (j = 0; j < count_of_smaller_lines; j++)
2196 ord[j] = ord[j + 1];
2197 ord[count_of_smaller_lines] = ord0;
2200 /* Free up some resources every once in a while. */
2201 if (MAX_PROCS_BEFORE_REAP < nprocs)
2205 if (unique && savedline)
2207 write_bytes (saved.text, saved.length, ofp, output_file);
2211 xfclose (ofp, output_file);
2214 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2215 and HI (with NHI members). T, LO, and HI point just past their
2216 respective arrays, and the arrays are in reverse order. NLO and
2217 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2220 mergelines (struct line *t,
2221 struct line const *lo, size_t nlo,
2222 struct line const *hi, size_t nhi)
2225 if (compare (lo - 1, hi - 1) <= 0)
2230 /* HI - NHI equalled T - (NLO + NHI) when this function
2231 began. Therefore HI must equal T now, and there is no
2232 need to copy from HI to T. */
2250 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2251 NLINES must be at least 2.
2252 The input and output arrays are in reverse order, and LINES and
2253 TEMP point just past the end of their respective arrays.
2255 Use a recursive divide-and-conquer algorithm, in the style
2256 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2257 the optimization suggested by exercise 5.2.4-10; this requires room
2258 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2259 writes that this memory optimization was originally published by
2260 D. A. Bell, Comp J. 1 (1958), 75. */
2263 sortlines (struct line *lines, size_t nlines, struct line *temp)
2267 if (0 < compare (&lines[-1], &lines[-2]))
2269 struct line tmp = lines[-1];
2270 lines[-1] = lines[-2];
2276 size_t nlo = nlines / 2;
2277 size_t nhi = nlines - nlo;
2278 struct line *lo = lines;
2279 struct line *hi = lines - nlo;
2280 struct line *sorted_lo = temp;
2282 sortlines (hi, nhi, temp);
2284 sortlines_temp (lo, nlo, sorted_lo);
2286 sorted_lo[-1] = lo[-1];
2288 mergelines (lines, sorted_lo, nlo, hi, nhi);
2292 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2293 rather than sorting in place. */
2296 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2300 /* Declare `swap' as int, not bool, to work around a bug
2301 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2302 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2303 int swap = (0 < compare (&lines[-1], &lines[-2]));
2304 temp[-1] = lines[-1 - swap];
2305 temp[-2] = lines[-2 + swap];
2309 size_t nlo = nlines / 2;
2310 size_t nhi = nlines - nlo;
2311 struct line *lo = lines;
2312 struct line *hi = lines - nlo;
2313 struct line *sorted_hi = temp - nlo;
2315 sortlines_temp (hi, nhi, sorted_hi);
2317 sortlines (lo, nlo, temp);
2319 mergelines (temp, lo, nlo, sorted_hi, nhi);
2323 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2324 the same as OUTFILE. If found, merge the found instances (and perhaps
2325 some other files) into a temporary file so that it can in turn be
2326 merged into OUTFILE without destroying OUTFILE before it is completely
2327 read. Return the new value of NFILES, which differs from the old if
2328 some merging occurred.
2330 This test ensures that an otherwise-erroneous use like
2331 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2332 It's not clear that POSIX requires this nicety.
2333 Detect common error cases, but don't try to catch obscure cases like
2334 "cat ... FILE ... | sort -m -o FILE"
2335 where traditional "sort" doesn't copy the input and where
2336 people should know that they're getting into trouble anyway.
2337 Catching these obscure cases would slow down performance in
2341 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2342 size_t nfiles, char const *outfile)
2345 bool got_outstat = false;
2346 struct stat outstat;
2348 for (i = ntemps; i < nfiles; i++)
2350 bool is_stdin = STREQ (files[i].name, "-");
2354 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2361 ? stat (outfile, &outstat)
2362 : fstat (STDOUT_FILENO, &outstat))
2369 ? fstat (STDIN_FILENO, &instat)
2370 : stat (files[i].name, &instat))
2372 && SAME_INODE (instat, outstat));
2379 char *temp = create_temp (&tftp, &pid);
2380 mergefps (&files[i],0, nfiles - i, tftp, temp);
2381 files[i].name = temp;
2390 /* Merge the input FILES. NTEMPS is the number of files at the
2391 start of FILES that are temporary; it is zero at the top level.
2392 NFILES is the total number of files. Put the output in
2393 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2396 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2397 char const *output_file)
2399 while (NMERGE < nfiles)
2401 /* Number of input files processed so far. */
2404 /* Number of output files generated so far. */
2407 /* nfiles % NMERGE; this counts input files that are left over
2408 after all full-sized merges have been done. */
2411 /* Number of easily-available slots at the next loop iteration. */
2414 /* Do as many NMERGE-size merges as possible. */
2415 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2419 char *temp = create_temp (&tfp, &pid);
2420 size_t nt = MIN (ntemps, NMERGE);
2422 mergefps (&files[in], nt, NMERGE, tfp, temp);
2423 files[out].name = temp;
2424 files[out].pid = pid;
2427 remainder = nfiles - in;
2428 cheap_slots = NMERGE - out % NMERGE;
2430 if (cheap_slots < remainder)
2432 /* So many files remain that they can't all be put into the last
2433 NMERGE-sized output window. Do one more merge. Merge as few
2434 files as possible, to avoid needless I/O. */
2435 size_t nshortmerge = remainder - cheap_slots + 1;
2438 char *temp = create_temp (&tfp, &pid);
2439 size_t nt = MIN (ntemps, nshortmerge);
2441 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2442 files[out].name = temp;
2443 files[out++].pid = pid;
2447 /* Put the remaining input files into the last NMERGE-sized output
2448 window, so they will be merged in the next pass. */
2449 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2454 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2455 mergefps (files, ntemps, nfiles, NULL, output_file);
2458 /* Sort NFILES FILES onto OUTPUT_FILE. */
2461 sort (char * const *files, size_t nfiles, char const *output_file)
2465 bool output_file_created = false;
2471 char const *temp_output;
2472 char const *file = *files;
2473 FILE *fp = xfopen (file, "r");
2475 size_t bytes_per_line = (2 * sizeof (struct line)
2476 - sizeof (struct line) / 2);
2479 initbuf (&buf, bytes_per_line,
2480 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2485 while (fillbuf (&buf, fp, file))
2488 struct line *linebase;
2490 if (buf.eof && nfiles
2491 && (bytes_per_line + 1
2492 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2494 /* End of file, but there is more input and buffer room.
2495 Concatenate the next input file; this is faster in
2497 buf.left = buf.used;
2501 line = buffer_linelim (&buf);
2502 linebase = line - buf.nlines;
2504 sortlines (line, buf.nlines, linebase);
2505 if (buf.eof && !nfiles && !ntemps && !buf.left)
2508 tfp = xfopen (output_file, "w");
2509 temp_output = output_file;
2510 output_file_created = true;
2515 temp_output = create_temp (&tfp, NULL);
2521 write_bytes (line->text, line->length, tfp, temp_output);
2523 while (linebase < line && compare (line, line - 1) == 0)
2526 while (linebase < line);
2528 xfclose (tfp, temp_output);
2530 /* Free up some resources every once in a while. */
2531 if (MAX_PROCS_BEFORE_REAP < nprocs)
2534 if (output_file_created)
2543 if (! output_file_created)
2546 struct tempnode *node = temphead;
2547 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2548 for (i = 0; node; i++)
2550 tempfiles[i].name = node->name;
2551 tempfiles[i].pid = node->pid;
2554 merge (tempfiles, ntemps, ntemps, output_file);
2559 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2562 insertkey (struct keyfield *key_arg)
2564 struct keyfield **p;
2565 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2567 for (p = &keylist; *p; p = &(*p)->next)
2573 /* Report a bad field specification SPEC, with extra info MSGID. */
2575 static void badfieldspec (char const *, char const *)
2578 badfieldspec (char const *spec, char const *msgid)
2580 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2581 _(msgid), quote (spec));
2585 /* Report incompatible options. */
2587 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2589 incompatible_options (char const *opts)
2591 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2595 /* Check compatibility of ordering options. */
2598 check_ordering_compatibility (void)
2600 struct keyfield const *key;
2602 for (key = keylist; key; key = key->next)
2603 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2605 || (key->random && key->translate))
2609 if (key->ignore == nondictionary)
2613 if (key->general_numeric)
2615 if (key->ignore == nonprinting)
2624 incompatible_options (opts);
2628 /* Parse the leading integer in STRING and store the resulting value
2629 (which must fit into size_t) into *VAL. Return the address of the
2630 suffix after the integer. If the value is too large, silently
2631 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2632 failure; otherwise, report MSGID and exit on failure. */
2635 parse_field_count (char const *string, size_t *val, char const *msgid)
2640 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2643 case LONGINT_INVALID_SUFFIX_CHAR:
2648 case LONGINT_OVERFLOW:
2649 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2653 case LONGINT_INVALID:
2655 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2656 _(msgid), quote (string));
2663 /* Handle interrupts and hangups. */
2666 sighandler (int sig)
2669 signal (sig, SIG_IGN);
2673 signal (sig, SIG_DFL);
2677 /* Set the ordering options for KEY specified in S.
2678 Return the address of the first character in S that
2679 is not a valid ordering option.
2680 BLANKTYPE is the kind of blanks that 'b' should skip. */
2683 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2690 if (blanktype == bl_start || blanktype == bl_both)
2691 key->skipsblanks = true;
2692 if (blanktype == bl_end || blanktype == bl_both)
2693 key->skipeblanks = true;
2696 key->ignore = nondictionary;
2699 key->translate = fold_toupper;
2702 key->general_numeric = true;
2705 /* Option order should not matter, so don't let -i override
2706 -d. -d implies -i, but -i does not imply -d. */
2708 key->ignore = nonprinting;
2714 key->numeric = true;
2720 key->reverse = true;
2730 static struct keyfield *
2731 key_init (struct keyfield *key)
2733 memset (key, 0, sizeof *key);
2734 key->eword = SIZE_MAX;
2739 main (int argc, char **argv)
2741 struct keyfield *key;
2742 struct keyfield key_buf;
2743 struct keyfield gkey;
2747 bool mergeonly = false;
2748 char *random_source = NULL;
2749 bool need_random = false;
2751 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2752 bool obsolete_usage = (posix2_version () < 200112);
2754 char const *outfile = NULL;
2756 initialize_main (&argc, &argv);
2757 set_program_name (argv[0]);
2758 setlocale (LC_ALL, "");
2759 bindtextdomain (PACKAGE, LOCALEDIR);
2760 textdomain (PACKAGE);
2762 initialize_exit_failure (SORT_FAILURE);
2764 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2765 #if HAVE_NL_LANGINFO
2766 hard_LC_TIME = hard_locale (LC_TIME);
2769 /* Get locale's representation of the decimal point. */
2771 struct lconv const *locale = localeconv ();
2773 /* If the locale doesn't define a decimal point, or if the decimal
2774 point is multibyte, use the C locale's decimal point. FIXME:
2775 add support for multibyte decimal points. */
2776 decimal_point = to_uchar (locale->decimal_point[0]);
2777 if (! decimal_point || locale->decimal_point[1])
2778 decimal_point = '.';
2780 /* FIXME: add support for multibyte thousands separators. */
2781 thousands_sep = to_uchar (*locale->thousands_sep);
2782 if (! thousands_sep || locale->thousands_sep[1])
2786 have_read_stdin = false;
2791 static int const sig[] =
2793 /* The usual suspects. */
2794 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2811 enum { nsigs = sizeof sig / sizeof sig[0] };
2814 struct sigaction act;
2816 sigemptyset (&caught_signals);
2817 for (i = 0; i < nsigs; i++)
2819 sigaction (sig[i], NULL, &act);
2820 if (act.sa_handler != SIG_IGN)
2821 sigaddset (&caught_signals, sig[i]);
2824 act.sa_handler = sighandler;
2825 act.sa_mask = caught_signals;
2828 for (i = 0; i < nsigs; i++)
2829 if (sigismember (&caught_signals, sig[i]))
2830 sigaction (sig[i], &act, NULL);
2832 for (i = 0; i < nsigs; i++)
2833 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2835 signal (sig[i], sighandler);
2836 siginterrupt (sig[i], 1);
2841 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2842 atexit (exit_cleanup);
2844 gkey.sword = gkey.eword = SIZE_MAX;
2846 gkey.translate = NULL;
2847 gkey.numeric = gkey.general_numeric = gkey.random = false;
2848 gkey.month = gkey.reverse = false;
2849 gkey.skipsblanks = gkey.skipeblanks = false;
2851 files = xnmalloc (argc, sizeof *files);
2855 /* Parse an operand as a file after "--" was seen; or if
2856 pedantic and a file was seen, unless the POSIX version
2857 predates 1003.1-2001 and -c was not seen and the operand is
2858 "-o FILE" or "-oFILE". */
2862 || (posixly_correct && nfiles != 0
2863 && ! (obsolete_usage
2866 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2867 && (argv[optind][2] || optind + 1 != argc)))
2868 || ((c = getopt_long (argc, argv, short_options,
2874 files[nfiles++] = argv[optind++];
2880 if (optarg[0] == '+')
2882 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2883 && ISDIGIT (argv[optind][1]));
2884 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2887 /* Treat +POS1 [-POS2] as a key if possible; but silently
2888 treat an operand as a file if it is not a valid +POS1. */
2889 key = key_init (&key_buf);
2890 s = parse_field_count (optarg + 1, &key->sword, NULL);
2892 s = parse_field_count (s + 1, &key->schar, NULL);
2893 if (! (key->sword | key->schar))
2894 key->sword = SIZE_MAX;
2895 if (! s || *set_ordering (s, key, bl_start))
2899 if (minus_pos_usage)
2901 char const *optarg1 = argv[optind++];
2902 s = parse_field_count (optarg1 + 1, &key->eword,
2903 N_("invalid number after `-'"));
2905 s = parse_field_count (s + 1, &key->echar,
2906 N_("invalid number after `.'"));
2907 if (*set_ordering (s, key, bl_end))
2908 badfieldspec (optarg1,
2909 N_("stray character in field spec"));
2916 files[nfiles++] = optarg;
2920 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
2935 set_ordering (str, &gkey, bl_both);
2941 ? XARGMATCH ("--check", optarg, check_args, check_types)
2946 if (checkonly && checkonly != c)
2947 incompatible_options ("cC");
2951 case COMPRESS_PROGRAM_OPTION:
2952 if (compress_program && !STREQ (compress_program, optarg))
2953 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2954 compress_program = optarg;
2958 key = key_init (&key_buf);
2961 s = parse_field_count (optarg, &key->sword,
2962 N_("invalid number at field start"));
2965 /* Provoke with `sort -k0' */
2966 badfieldspec (optarg, N_("field number is zero"));
2970 s = parse_field_count (s + 1, &key->schar,
2971 N_("invalid number after `.'"));
2974 /* Provoke with `sort -k1.0' */
2975 badfieldspec (optarg, N_("character offset is zero"));
2978 if (! (key->sword | key->schar))
2979 key->sword = SIZE_MAX;
2980 s = set_ordering (s, key, bl_start);
2983 key->eword = SIZE_MAX;
2989 s = parse_field_count (s + 1, &key->eword,
2990 N_("invalid number after `,'"));
2993 /* Provoke with `sort -k1,0' */
2994 badfieldspec (optarg, N_("field number is zero"));
2997 s = parse_field_count (s + 1, &key->echar,
2998 N_("invalid number after `.'"));
3001 /* `-k 2,3' is equivalent to `+1 -3'. */
3004 s = set_ordering (s, key, bl_end);
3007 badfieldspec (optarg, N_("stray character in field spec"));
3016 if (outfile && !STREQ (outfile, optarg))
3017 error (SORT_FAILURE, 0, _("multiple output files specified"));
3021 case RANDOM_SOURCE_OPTION:
3022 if (random_source && !STREQ (random_source, optarg))
3023 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3024 random_source = optarg;
3032 specify_sort_size (oi, c, optarg);
3037 char newtab = optarg[0];
3039 error (SORT_FAILURE, 0, _("empty tab"));
3042 if (STREQ (optarg, "\\0"))
3046 /* Provoke with `sort -txx'. Complain about
3047 "multi-character tab" instead of "multibyte tab", so
3048 that the diagnostic's wording does not need to be
3049 changed once multibyte characters are supported. */
3050 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3054 if (tab != TAB_DEFAULT && tab != newtab)
3055 error (SORT_FAILURE, 0, _("incompatible tabs"));
3061 add_temp_dir (optarg);
3069 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3070 through Solaris 7. It is also accepted by many non-Solaris
3071 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3072 -y is marked as obsolete starting with Solaris 8 (1999), but is
3073 still accepted as of Solaris 10 prerelease (2004).
3075 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3076 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3077 and which in general ignores the argument after "-y" if it
3078 consists entirely of digits (it can even be empty). */
3079 if (optarg == argv[optind - 1])
3082 for (p = optarg; ISDIGIT (*p); p++)
3084 optind -= (*p != '\0');
3092 case_GETOPT_HELP_CHAR;
3094 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3097 usage (SORT_FAILURE);
3101 /* Inheritance of global options to individual keys. */
3102 for (key = keylist; key; key = key->next)
3104 if (! (key->ignore || key->translate
3105 || (key->skipsblanks | key->reverse
3106 | key->skipeblanks | key->month | key->numeric
3107 | key->general_numeric
3110 key->ignore = gkey.ignore;
3111 key->translate = gkey.translate;
3112 key->skipsblanks = gkey.skipsblanks;
3113 key->skipeblanks = gkey.skipeblanks;
3114 key->month = gkey.month;
3115 key->numeric = gkey.numeric;
3116 key->general_numeric = gkey.general_numeric;
3117 key->random = gkey.random;
3118 key->reverse = gkey.reverse;
3121 need_random |= key->random;
3124 if (!keylist && (gkey.ignore || gkey.translate
3125 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3126 | gkey.numeric | gkey.general_numeric
3130 need_random |= gkey.random;
3133 check_ordering_compatibility ();
3135 reverse = gkey.reverse;
3139 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3140 if (! randread_source)
3141 die (_("open failed"), random_source);
3144 if (temp_dir_count == 0)
3146 char const *tmp_dir = getenv ("TMPDIR");
3147 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3152 static char *minus = "-";
3161 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3162 quote (files[1]), checkonly);
3166 static char opts[] = {0, 'o', 0};
3167 opts[0] = checkonly;
3168 incompatible_options (opts);
3171 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3172 input is not properly sorted. */
3173 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3178 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3181 for (i = 0; i < nfiles; ++i)
3182 sortfiles[i].name = files[i];
3184 merge (sortfiles, 0, nfiles, outfile);
3185 IF_LINT (free (sortfiles));
3188 sort (files, nfiles, outfile);
3190 if (have_read_stdin && fclose (stdin) == EOF)
3191 die (_("close failed"), "-");
3193 exit (EXIT_SUCCESS);