1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
33 #include "hard-locale.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
59 #define AUTHORS "Mike Haertel", "Paul Eggert"
61 #if HAVE_LANGINFO_CODESET
62 # include <langinfo.h>
65 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
68 # define SA_NOCLDSTOP 0
69 /* No sigprocmask. Always 'return' zero. */
70 # define sigprocmask(How, Set, Oset) (0)
72 # if ! HAVE_SIGINTERRUPT
73 # define siginterrupt(sig, flag) /* empty */
81 #define UCHAR_LIM (UCHAR_MAX + 1)
83 #ifndef DEFAULT_TMPDIR
84 # define DEFAULT_TMPDIR "/tmp"
90 /* POSIX says to exit with status 1 if invoked with -c and the
91 input is not properly sorted. */
92 SORT_OUT_OF_ORDER = 1,
94 /* POSIX says any other irregular exit must exit with a status
95 code greater than 1. */
101 /* The number of times we should try to fork a compression process
102 (we retry if the fork call fails). We don't _need_ to compress
103 temp files, this is just to reduce disk access, so this number
105 MAX_FORK_TRIES_COMPRESS = 2,
107 /* The number of times we should try to fork a decompression process.
108 If we can't fork a decompression process, we can't sort, so this
109 number should be big. */
110 MAX_FORK_TRIES_DECOMPRESS = 8
113 /* The representation of the decimal point in the current locale. */
114 static int decimal_point;
116 /* Thousands separator; if -1, then there isn't one. */
117 static int thousands_sep;
119 /* Nonzero if the corresponding locales are hard. */
120 static bool hard_LC_COLLATE;
122 static bool hard_LC_TIME;
125 #define NONZERO(x) ((x) != 0)
127 /* The kind of blanks for '-b' to skip in various options. */
128 enum blanktype { bl_start, bl_end, bl_both };
130 /* The character marking end of line. Default to \n. */
131 static char eolchar = '\n';
133 /* Lines are held in core as counted strings. */
136 char *text; /* Text of the line. */
137 size_t length; /* Length including final newline. */
138 char *keybeg; /* Start of first key. */
139 char *keylim; /* Limit of first key. */
145 char *buf; /* Dynamically allocated buffer,
146 partitioned into 3 regions:
149 - an array of lines, in reverse order. */
150 size_t used; /* Number of bytes used for input data. */
151 size_t nlines; /* Number of lines in the line array. */
152 size_t alloc; /* Number of bytes allocated. */
153 size_t left; /* Number of bytes left from previous reads. */
154 size_t line_bytes; /* Number of bytes to reserve for each line. */
155 bool eof; /* An EOF has been read. */
160 size_t sword; /* Zero-origin 'word' to start at. */
161 size_t schar; /* Additional characters to skip. */
162 size_t eword; /* Zero-origin first word after field. */
163 size_t echar; /* Additional characters in field. */
164 bool const *ignore; /* Boolean array of characters to ignore. */
165 char const *translate; /* Translation applied to characters. */
166 bool skipsblanks; /* Skip leading blanks when finding start. */
167 bool skipeblanks; /* Skip leading blanks when finding end. */
168 bool numeric; /* Flag for numeric comparison. Handle
169 strings of digits with optional decimal
170 point, but no exponential notation. */
171 bool random; /* Sort by random hash of key. */
172 bool general_numeric; /* Flag for general, numeric comparison.
173 Handle numbers in exponential notation. */
174 bool month; /* Flag for comparison by month name. */
175 bool reverse; /* Reverse the sense of comparison. */
176 struct keyfield *next; /* Next keyfield to try. */
185 /* The name this program was run with. */
188 /* FIXME: None of these tables work with multibyte character sets.
189 Also, there are many other bugs when handling multibyte characters.
190 One way to fix this is to rewrite `sort' to use wide characters
191 internally, but doing this with good performance is a bit
194 /* Table of blanks. */
195 static bool blanks[UCHAR_LIM];
197 /* Table of non-printing characters. */
198 static bool nonprinting[UCHAR_LIM];
200 /* Table of non-dictionary characters (not letters, digits, or blanks). */
201 static bool nondictionary[UCHAR_LIM];
203 /* Translation table folding lower case to upper. */
204 static char fold_toupper[UCHAR_LIM];
206 #define MONTHS_PER_YEAR 12
208 /* Table mapping month names to integers.
209 Alphabetic order allows binary search. */
210 static struct month monthtab[] =
226 /* During the merge phase, the number of files to merge at once. */
229 /* Minimum size for a merge or check buffer. */
230 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
232 /* Minimum sort size; the code might not work with smaller sizes. */
233 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
235 /* The number of bytes needed for a merge or check buffer, which can
236 function relatively efficiently even if it holds only one line. If
237 a longer line is seen, this value is increased. */
238 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
240 /* The approximate maximum number of bytes of main memory to use, as
241 specified by the user. Zero if the user has not specified a size. */
242 static size_t sort_size;
244 /* The guessed size for non-regular files. */
245 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
247 /* Array of directory names in which any temporary files are to be created. */
248 static char const **temp_dirs;
250 /* Number of temporary directory names used. */
251 static size_t temp_dir_count;
253 /* Number of allocated slots in temp_dirs. */
254 static size_t temp_dir_alloc;
256 /* Flag to reverse the order of all comparisons. */
259 /* Flag for stable sort. This turns off the last ditch bytewise
260 comparison of lines, and instead leaves lines in the same order
261 they were read if all keys compare equal. */
264 /* If TAB has this value, blanks separate fields. */
265 enum { TAB_DEFAULT = CHAR_MAX + 1 };
267 /* Tab character separating fields. If TAB_DEFAULT, then fields are
268 separated by the empty string between a non-blank character and a blank
270 static int tab = TAB_DEFAULT;
272 /* Flag to remove consecutive duplicate lines from the output.
273 Only the last of a sequence of equal lines will be output. */
276 /* Nonzero if any of the input files are the standard input. */
277 static bool have_read_stdin;
279 /* List of key field comparisons to be tried. */
280 static struct keyfield *keylist;
282 /* Program used to (de)compress temp files. Must accept -d. */
283 static char const *compress_program;
285 static void sortlines_temp (struct line *, size_t, struct line *);
287 /* Report MESSAGE for FILE, then clean up and exit.
288 If FILE is null, it represents standard output. */
290 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
292 die (char const *message, char const *file)
294 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
301 if (status != EXIT_SUCCESS)
302 fprintf (stderr, _("Try `%s --help' for more information.\n"),
307 Usage: %s [OPTION]... [FILE]...\n\
311 Write sorted concatenation of all FILE(s) to standard output.\n\
315 Mandatory arguments to long options are mandatory for short options too.\n\
322 -b, --ignore-leading-blanks ignore leading blanks\n\
323 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
324 -f, --ignore-case fold lower case to upper case characters\n\
327 -g, --general-numeric-sort compare according to general numerical value\n\
328 -i, --ignore-nonprinting consider only printable characters\n\
329 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
330 -n, --numeric-sort compare according to string numerical value\n\
331 -R, --random-sort sort by random hash of keys\n\
332 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
333 -r, --reverse reverse the result of comparisons\n\
339 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
340 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
341 --compress-program=PROG compress temporaries with PROG;\n\
342 decompress them with PROG -d\n\
343 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
344 -m, --merge merge already sorted files; do not sort\n\
347 -o, --output=FILE write result to FILE instead of standard output\n\
348 -s, --stable stabilize sort by disabling last-resort comparison\n\
349 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
352 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
353 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
354 multiple options specify multiple directories\n\
355 -u, --unique with -c, check for strict ordering;\n\
356 without -c, output only the first of an equal run\n\
359 -z, --zero-terminated end lines with 0 byte, not newline\n\
361 fputs (HELP_OPTION_DESCRIPTION, stdout);
362 fputs (VERSION_OPTION_DESCRIPTION, stdout);
365 POS is F[.C][OPTS], where F is the field number and C the character position\n\
366 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
367 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
368 one or more single-letter ordering options, which override global ordering\n\
369 options for that key. If no key is given, use the entire line as the key.\n\
371 SIZE may be followed by the following multiplicative suffixes:\n\
374 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
376 With no FILE, or when FILE is -, read standard input.\n\
379 The locale specified by the environment affects sort order.\n\
380 Set LC_ALL=C to get the traditional sort order that uses\n\
381 native byte values.\n\
383 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
389 /* For long options that have no equivalent short option, use a
390 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
393 CHECK_OPTION = CHAR_MAX + 1,
394 COMPRESS_PROGRAM_OPTION,
398 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
400 static struct option const long_options[] =
402 {"ignore-leading-blanks", no_argument, NULL, 'b'},
403 {"check", optional_argument, NULL, CHECK_OPTION},
404 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
405 {"dictionary-order", no_argument, NULL, 'd'},
406 {"ignore-case", no_argument, NULL, 'f'},
407 {"general-numeric-sort", no_argument, NULL, 'g'},
408 {"ignore-nonprinting", no_argument, NULL, 'i'},
409 {"key", required_argument, NULL, 'k'},
410 {"merge", no_argument, NULL, 'm'},
411 {"month-sort", no_argument, NULL, 'M'},
412 {"numeric-sort", no_argument, NULL, 'n'},
413 {"random-sort", no_argument, NULL, 'R'},
414 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
415 {"output", required_argument, NULL, 'o'},
416 {"reverse", no_argument, NULL, 'r'},
417 {"stable", no_argument, NULL, 's'},
418 {"buffer-size", required_argument, NULL, 'S'},
419 {"field-separator", required_argument, NULL, 't'},
420 {"temporary-directory", required_argument, NULL, 'T'},
421 {"unique", no_argument, NULL, 'u'},
422 {"zero-terminated", no_argument, NULL, 'z'},
423 {GETOPT_HELP_OPTION_DECL},
424 {GETOPT_VERSION_OPTION_DECL},
428 static char const *const check_args[] =
430 "quiet", "silent", "diagnose-first", NULL
432 static char const check_types[] =
436 ARGMATCH_VERIFY (check_args, check_types);
438 /* The set of signals that are caught. */
439 static sigset_t caught_signals;
441 /* Critical section status. */
448 /* Enter a critical section. */
449 static struct cs_status
452 struct cs_status status;
453 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
457 /* Leave a critical section. */
459 cs_leave (struct cs_status status)
463 /* Ignore failure when restoring the signal mask. */
464 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
468 /* The list of temporary files. */
471 struct tempnode *volatile next;
472 pid_t pid; /* If compressed, the pid of compressor, else zero */
473 char name[1]; /* Actual size is 1 + file name length. */
475 static struct tempnode *volatile temphead;
476 static struct tempnode *volatile *temptail = &temphead;
481 pid_t pid; /* If compressed, the pid of compressor, else zero */
484 /* A table where we store compression process states. We clean up all
485 processes in a timely manner so as not to exhaust system resources,
486 so we store the info on whether the process is still running, or has
488 static Hash_table *proctab;
490 enum { INIT_PROCTAB_SIZE = 47 };
492 enum procstate { ALIVE, ZOMBIE };
494 /* A proctab entry. The COUNT field is there in case we fork a new
495 compression process that has the same PID as an old zombie process
496 that is still in the table (because the process to decompress the
497 temp file it was associated with hasn't started yet). */
501 enum procstate state;
506 proctab_hasher (const void *entry, size_t tabsize)
508 const struct procnode *node = entry;
509 return node->pid % tabsize;
513 proctab_comparator (const void *e1, const void *e2)
515 const struct procnode *n1 = e1, *n2 = e2;
516 return n1->pid == n2->pid;
519 /* The total number of forked processes (compressors and decompressors)
520 that have not been reaped yet. */
521 static size_t nprocs;
523 /* The number of child processes we'll allow before we try to reap some. */
524 enum { MAX_PROCS_BEFORE_REAP = 2 };
526 /* If 0 < PID, wait for the child process with that PID to exit.
527 If PID is -1, clean up a random child process which has finished and
528 return the process ID of that child. If PID is -1 and no processes
529 have quit yet, return 0 without waiting. */
535 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
538 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
542 if (! WIFEXITED (status) || WEXITSTATUS (status))
543 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
551 /* Add the PID of a running compression process to proctab, or update
552 the entry COUNT and STATE fields if it's already there. This also
553 creates the table for us the first time it's called. */
556 register_proc (pid_t pid)
558 struct procnode test, *node;
562 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
571 node = hash_lookup (proctab, &test);
579 node = xmalloc (sizeof *node);
583 hash_insert (proctab, node);
587 /* This is called when we reap a random process. We don't know
588 whether we have reaped a compression process or a decompression
589 process until we look in the table. If there's an ALIVE entry for
590 it, then we have reaped a compression process, so change the state
591 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
594 update_proc (pid_t pid)
596 struct procnode test, *node;
599 node = hash_lookup (proctab, &test);
601 node->state = ZOMBIE;
604 /* This is for when we need to wait for a compression process to exit.
605 If it has a ZOMBIE entry in the table then it's already dead and has
606 been reaped. Note that if there's an ALIVE entry for it, it still may
607 already have died and been reaped if a second process was created with
608 the same PID. This is probably exceedingly rare, but to be on the safe
609 side we will have to wait for any compression process with this PID. */
612 wait_proc (pid_t pid)
614 struct procnode test, *node;
617 node = hash_lookup (proctab, &test);
618 if (node->state == ALIVE)
621 node->state = ZOMBIE;
624 hash_delete (proctab, node);
629 /* Keep reaping finished children as long as there are more to reap.
630 This doesn't block waiting for any of them, it only reaps those
631 that are already dead. */
638 while (0 < nprocs && (pid = reap (-1)))
642 /* Clean up any remaining temporary files. */
647 struct tempnode const *node;
649 for (node = temphead; node; node = node->next)
654 /* Cleanup actions to take when exiting. */
661 /* Clean up any remaining temporary files in a critical section so
662 that a signal handler does not try to clean them too. */
663 struct cs_status cs = cs_enter ();
671 /* Create a new temporary file, returning its newly allocated tempnode.
672 Store into *PFD the file descriptor open for writing. */
674 static struct tempnode *
675 create_temp_file (int *pfd)
677 static char const slashbase[] = "/sortXXXXXX";
678 static size_t temp_dir_index;
681 char const *temp_dir = temp_dirs[temp_dir_index];
682 size_t len = strlen (temp_dir);
683 struct tempnode *node =
684 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
685 char *file = node->name;
688 memcpy (file, temp_dir, len);
689 memcpy (file + len, slashbase, sizeof slashbase);
692 if (++temp_dir_index == temp_dir_count)
695 /* Create the temporary file in a critical section, to avoid races. */
701 temptail = &node->next;
708 die (_("cannot create temporary file"), file);
714 /* Return a stream for FILE, opened with mode HOW. A null FILE means
715 standard output; HOW should be "w". When opening for input, "-"
716 means standard input. To avoid confusion, do not return file
717 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
718 opening an ordinary FILE. */
721 xfopen (const char *file, const char *how)
727 else if (STREQ (file, "-") && *how == 'r')
729 have_read_stdin = true;
734 fp = fopen (file, how);
736 die (_("open failed"), file);
742 /* Close FP, whose name is FILE, and report any errors. */
745 xfclose (FILE *fp, char const *file)
750 /* Allow reading stdin from tty more than once. */
756 /* Don't close stdout just yet. close_stdout does that. */
757 if (fflush (fp) != 0)
758 die (_("fflush failed"), file);
762 if (fclose (fp) != 0)
763 die (_("close failed"), file);
769 dup2_or_die (int oldfd, int newfd)
771 if (dup2 (oldfd, newfd) < 0)
772 error (SORT_FAILURE, errno, _("dup2 failed"));
775 /* Fork a child process for piping to and do common cleanup. The
776 TRIES parameter tells us how many times to try to fork before
777 giving up. Return the PID of the child or -1 if fork failed. */
780 pipe_fork (int pipefds[2], size_t tries)
782 #if HAVE_WORKING_FORK
783 struct tempnode *saved_temphead;
785 unsigned int wait_retry = 1;
786 pid_t pid IF_LINT (= -1);
789 if (pipe (pipefds) < 0)
794 /* This is so the child process won't delete our temp files
795 if it receives a signal before exec-ing. */
797 saved_temphead = temphead;
803 temphead = saved_temphead;
808 if (0 <= pid || errno != EAGAIN)
825 close (STDIN_FILENO);
826 close (STDOUT_FILENO);
833 #else /* ! HAVE_WORKING_FORK */
838 /* Create a temporary file and start a compression program to filter output
839 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
840 set it to the PID of the newly-created process. */
843 create_temp (FILE **pfp, pid_t *ppid)
846 struct tempnode *node = create_temp_file (&tempfd);
847 char *name = node->name;
849 if (compress_program)
853 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
860 register_proc (node->pid);
862 else if (node->pid == 0)
865 dup2_or_die (tempfd, STDOUT_FILENO);
867 dup2_or_die (pipefds[0], STDIN_FILENO);
870 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
871 error (SORT_FAILURE, errno, _("couldn't execute %s"),
878 *pfp = fdopen (tempfd, "w");
880 die (_("couldn't create temporary file"), name);
888 /* Open a compressed temp file and start a decompression process through
889 which to filter the input. PID must be the valid processes ID of the
890 process used to compress the file. */
893 open_temp (const char *name, pid_t pid)
895 int tempfd, pipefds[2];
901 tempfd = open (name, O_RDONLY);
903 die (_("couldn't open temporary file"), name);
905 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
911 else if (child_pid == 0)
914 dup2_or_die (tempfd, STDIN_FILENO);
916 dup2_or_die (pipefds[1], STDOUT_FILENO);
919 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
920 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
924 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
927 fp = fdopen (pipefds[0], "r");
929 die (_("couldn't create temporary file"), name);
935 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
937 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
938 die (_("write failed"), output_file);
941 /* Append DIR to the array of temporary directory names. */
943 add_temp_dir (char const *dir)
945 if (temp_dir_count == temp_dir_alloc)
946 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
948 temp_dirs[temp_dir_count++] = dir;
951 /* Remove NAME from the list of temporary files. */
954 zaptemp (const char *name)
956 struct tempnode *volatile *pnode;
957 struct tempnode *node;
958 struct tempnode *next;
960 int unlink_errno = 0;
963 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
966 /* Unlink the temporary file in a critical section to avoid races. */
969 unlink_status = unlink (name);
970 unlink_errno = errno;
974 if (unlink_status != 0)
975 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
984 struct_month_cmp (const void *m1, const void *m2)
986 struct month const *month1 = m1;
987 struct month const *month2 = m2;
988 return strcmp (month1->name, month2->name);
993 /* Initialize the character class tables. */
1000 for (i = 0; i < UCHAR_LIM; ++i)
1002 blanks[i] = !! isblank (i);
1003 nonprinting[i] = ! isprint (i);
1004 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1005 fold_toupper[i] = toupper (i);
1008 #if HAVE_NL_LANGINFO
1009 /* If we're not in the "C" locale, read different names for months. */
1012 for (i = 0; i < MONTHS_PER_YEAR; i++)
1019 s = (char *) nl_langinfo (ABMON_1 + i);
1021 monthtab[i].name = name = xmalloc (s_len + 1);
1022 monthtab[i].val = i + 1;
1024 for (j = 0; j < s_len; j++)
1025 name[j] = fold_toupper[to_uchar (s[j])];
1028 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1029 sizeof *monthtab, struct_month_cmp);
1034 /* Specify the amount of main memory to use when sorting. */
1036 specify_sort_size (char const *s)
1040 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1042 /* The default unit is KiB. */
1043 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1045 if (n <= UINTMAX_MAX / 1024)
1048 e = LONGINT_OVERFLOW;
1051 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1052 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1061 double mem = physmem_total () * n / 100;
1063 /* Use "<", not "<=", to avoid problems with rounding. */
1064 if (mem < UINTMAX_MAX)
1070 e = LONGINT_OVERFLOW;
1075 if (e == LONGINT_OK)
1077 /* If multiple sort sizes are specified, take the maximum, so
1078 that option order does not matter. */
1085 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1089 e = LONGINT_OVERFLOW;
1092 STRTOL_FATAL_ERROR (s, _("sort size"), e);
1095 /* Return the default sort size. */
1097 default_sort_size (void)
1099 /* Let MEM be available memory or 1/8 of total memory, whichever
1101 double avail = physmem_available ();
1102 double total = physmem_total ();
1103 double mem = MAX (avail, total / 8);
1104 struct rlimit rlimit;
1106 /* Let SIZE be MEM, but no more than the maximum object size or
1107 system resource limits. Avoid the MIN macro here, as it is not
1108 quite right when only one argument is floating point. Don't
1109 bother to check for values like RLIM_INFINITY since in practice
1110 they are not much less than SIZE_MAX. */
1111 size_t size = SIZE_MAX;
1114 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1115 size = rlimit.rlim_cur;
1117 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1118 size = rlimit.rlim_cur;
1121 /* Leave a large safety margin for the above limits, as failure can
1122 occur when they are exceeded. */
1126 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1127 Exceeding RSS is not fatal, but can be quite slow. */
1128 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1129 size = rlimit.rlim_cur / 16 * 15;
1132 /* Use no less than the minimum. */
1133 return MAX (size, MIN_SORT_SIZE);
1136 /* Return the sort buffer size to use with the input files identified
1137 by FPS and FILES, which are alternate names of the same files.
1138 NFILES gives the number of input files; NFPS may be less. Assume
1139 that each input line requires LINE_BYTES extra bytes' worth of line
1140 information. Do not exceed the size bound specified by the user
1141 (or a default size bound, if the user does not specify one). */
1144 sort_buffer_size (FILE *const *fps, size_t nfps,
1145 char *const *files, size_t nfiles,
1148 /* A bound on the input size. If zero, the bound hasn't been
1150 static size_t size_bound;
1152 /* In the worst case, each input byte is a newline. */
1153 size_t worst_case_per_input_byte = line_bytes + 1;
1155 /* Keep enough room for one extra input line and an extra byte.
1156 This extra room might be needed when preparing to read EOF. */
1157 size_t size = worst_case_per_input_byte + 1;
1161 for (i = 0; i < nfiles; i++)
1167 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1168 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1169 : stat (files[i], &st))
1171 die (_("stat failed"), files[i]);
1173 if (S_ISREG (st.st_mode))
1174 file_size = st.st_size;
1177 /* The file has unknown size. If the user specified a sort
1178 buffer size, use that; otherwise, guess the size. */
1181 file_size = INPUT_FILE_SIZE_GUESS;
1186 size_bound = sort_size;
1188 size_bound = default_sort_size ();
1191 /* Add the amount of memory needed to represent the worst case
1192 where the input consists entirely of newlines followed by a
1193 single non-newline. Check for overflow. */
1194 worst_case = file_size * worst_case_per_input_byte + 1;
1195 if (file_size != worst_case / worst_case_per_input_byte
1196 || size_bound - size <= worst_case)
1204 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1205 must be at least sizeof (struct line). Allocate ALLOC bytes
1209 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1211 /* Ensure that the line array is properly aligned. If the desired
1212 size cannot be allocated, repeatedly halve it until allocation
1213 succeeds. The smaller allocation may hurt overall performance,
1214 but that's better than failing. */
1217 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1218 buf->buf = malloc (alloc);
1222 if (alloc <= line_bytes + 1)
1226 buf->line_bytes = line_bytes;
1228 buf->used = buf->left = buf->nlines = 0;
1232 /* Return one past the limit of the line array. */
1234 static inline struct line *
1235 buffer_linelim (struct buffer const *buf)
1237 return (struct line *) (buf->buf + buf->alloc);
1240 /* Return a pointer to the first character of the field specified
1244 begfield (const struct line *line, const struct keyfield *key)
1246 char *ptr = line->text, *lim = ptr + line->length - 1;
1247 size_t sword = key->sword;
1248 size_t schar = key->schar;
1249 size_t remaining_bytes;
1251 /* The leading field separator itself is included in a field when -t
1254 if (tab != TAB_DEFAULT)
1255 while (ptr < lim && sword--)
1257 while (ptr < lim && *ptr != tab)
1263 while (ptr < lim && sword--)
1265 while (ptr < lim && blanks[to_uchar (*ptr)])
1267 while (ptr < lim && !blanks[to_uchar (*ptr)])
1271 if (key->skipsblanks)
1272 while (ptr < lim && blanks[to_uchar (*ptr)])
1275 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1276 remaining_bytes = lim - ptr;
1277 if (schar < remaining_bytes)
1285 /* Return the limit of (a pointer to the first character after) the field
1286 in LINE specified by KEY. */
1289 limfield (const struct line *line, const struct keyfield *key)
1291 char *ptr = line->text, *lim = ptr + line->length - 1;
1292 size_t eword = key->eword, echar = key->echar;
1293 size_t remaining_bytes;
1295 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1296 whichever comes first. If there are more than EWORD fields, leave
1297 PTR pointing at the beginning of the field having zero-based index,
1298 EWORD. If a delimiter character was specified (via -t), then that
1299 `beginning' is the first character following the delimiting TAB.
1300 Otherwise, leave PTR pointing at the first `blank' character after
1301 the preceding field. */
1302 if (tab != TAB_DEFAULT)
1303 while (ptr < lim && eword--)
1305 while (ptr < lim && *ptr != tab)
1307 if (ptr < lim && (eword | echar))
1311 while (ptr < lim && eword--)
1313 while (ptr < lim && blanks[to_uchar (*ptr)])
1315 while (ptr < lim && !blanks[to_uchar (*ptr)])
1319 #ifdef POSIX_UNSPECIFIED
1320 /* The following block of code makes GNU sort incompatible with
1321 standard Unix sort, so it's ifdef'd out for now.
1322 The POSIX spec isn't clear on how to interpret this.
1323 FIXME: request clarification.
1325 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1326 Date: Thu, 30 May 96 12:20:41 -0400
1327 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1329 [...]I believe I've found another bug in `sort'.
1334 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1337 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1341 Unix sort produced the answer I expected: sort on the single character
1342 in column 7. GNU sort produced different results, because it disagrees
1343 on the interpretation of the key-end spec "M.N". Unix sort reads this
1344 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1345 "skip M-1 fields, then either N-1 characters or the rest of the current
1346 field, whichever comes first". This extra clause applies only to
1347 key-ends, not key-starts.
1350 /* Make LIM point to the end of (one byte past) the current field. */
1351 if (tab != TAB_DEFAULT)
1354 newlim = memchr (ptr, tab, lim - ptr);
1362 while (newlim < lim && blanks[to_uchar (*newlim)])
1364 while (newlim < lim && !blanks[to_uchar (*newlim)])
1370 /* If we're ignoring leading blanks when computing the End
1371 of the field, don't start counting bytes until after skipping
1372 past any leading blanks. */
1373 if (key->skipeblanks)
1374 while (ptr < lim && blanks[to_uchar (*ptr)])
1377 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1378 remaining_bytes = lim - ptr;
1379 if (echar < remaining_bytes)
1387 /* Fill BUF reading from FP, moving buf->left bytes from the end
1388 of buf->buf to the beginning first. If EOF is reached and the
1389 file wasn't terminated by a newline, supply one. Set up BUF's line
1390 table too. FILE is the name of the file corresponding to FP.
1391 Return true if some input was read. */
1394 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1396 struct keyfield const *key = keylist;
1398 size_t line_bytes = buf->line_bytes;
1399 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1404 if (buf->used != buf->left)
1406 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1407 buf->used = buf->left;
1413 char *ptr = buf->buf + buf->used;
1414 struct line *linelim = buffer_linelim (buf);
1415 struct line *line = linelim - buf->nlines;
1416 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1417 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1419 while (line_bytes + 1 < avail)
1421 /* Read as many bytes as possible, but do not read so many
1422 bytes that there might not be enough room for the
1423 corresponding line array. The worst case is when the
1424 rest of the input file consists entirely of newlines,
1425 except that the last byte is not a newline. */
1426 size_t readsize = (avail - 1) / (line_bytes + 1);
1427 size_t bytes_read = fread (ptr, 1, readsize, fp);
1428 char *ptrlim = ptr + bytes_read;
1430 avail -= bytes_read;
1432 if (bytes_read != readsize)
1435 die (_("read failed"), file);
1439 if (buf->buf == ptrlim)
1441 if (ptrlim[-1] != eol)
1446 /* Find and record each line in the just-read input. */
1447 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1451 line->text = line_start;
1452 line->length = ptr - line_start;
1453 mergesize = MAX (mergesize, line->length);
1454 avail -= line_bytes;
1458 /* Precompute the position of the first key for
1460 line->keylim = (key->eword == SIZE_MAX
1462 : limfield (line, key));
1464 if (key->sword != SIZE_MAX)
1465 line->keybeg = begfield (line, key);
1468 if (key->skipsblanks)
1469 while (blanks[to_uchar (*line_start)])
1471 line->keybeg = line_start;
1483 buf->used = ptr - buf->buf;
1484 buf->nlines = buffer_linelim (buf) - line;
1485 if (buf->nlines != 0)
1487 buf->left = ptr - line_start;
1488 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1492 /* The current input line is too long to fit in the buffer.
1493 Double the buffer size and try again. */
1494 buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1498 /* Compare strings A and B as numbers without explicitly converting them to
1499 machine numbers. Comparatively slow for short strings, but asymptotically
1503 numcompare (const char *a, const char *b)
1505 while (blanks[to_uchar (*a)])
1507 while (blanks[to_uchar (*b)])
1510 return strnumcmp (a, b, decimal_point, thousands_sep);
1514 general_numcompare (const char *sa, const char *sb)
1516 /* FIXME: add option to warn about failed conversions. */
1517 /* FIXME: maybe add option to try expensive FP conversion
1518 only if A and B can't be compared more cheaply/accurately. */
1522 double a = strtod (sa, &ea);
1523 double b = strtod (sb, &eb);
1525 /* Put conversion errors at the start of the collating sequence. */
1527 return sb == eb ? 0 : -1;
1531 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1532 conversion errors but before numbers; sort them by internal
1533 bit-pattern, for lack of a more portable alternative. */
1539 : memcmp ((char *) &a, (char *) &b, sizeof a));
1542 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1543 Return 0 if the name in S is not recognized. */
1546 getmonth (char const *month, size_t len)
1549 size_t hi = MONTHS_PER_YEAR;
1550 char const *monthlim = month + len;
1554 if (month == monthlim)
1556 if (!blanks[to_uchar (*month)])
1563 size_t ix = (lo + hi) / 2;
1564 char const *m = month;
1565 char const *n = monthtab[ix].name;
1570 return monthtab[ix].val;
1571 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1576 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1588 /* A source of random data. */
1589 static struct randread_source *randread_source;
1591 /* Return the Ith randomly-generated state. The caller must invoke
1592 random_state (H) for all H less than I before invoking random_state
1595 static struct md5_ctx
1596 random_state (size_t i)
1598 /* An array of states resulting from the random data, and counts of
1599 its used and allocated members. */
1600 static struct md5_ctx *state;
1602 static size_t allocated;
1604 struct md5_ctx *s = &state[i];
1608 unsigned char buf[MD5_DIGEST_SIZE];
1614 state = X2NREALLOC (state, &allocated);
1618 randread (randread_source, buf, sizeof buf);
1620 md5_process_bytes (buf, sizeof buf, s);
1626 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1627 with length LENGTHB. Return negative if less, zero if equal,
1628 positive if greater. */
1631 cmp_hashes (char const *texta, size_t lena,
1632 char const *textb, size_t lenb)
1634 /* Try random hashes until a pair of hashes disagree. But if the
1635 first pair of random hashes agree, check whether the keys are
1636 identical and if so report no difference. */
1641 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1642 struct md5_ctx s[2];
1643 s[0] = s[1] = random_state (i);
1644 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1645 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1646 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1649 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1656 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1657 using one or more random hash functions. */
1660 compare_random (char *restrict texta, size_t lena,
1661 char *restrict textb, size_t lenb)
1665 if (! hard_LC_COLLATE)
1666 diff = cmp_hashes (texta, lena, textb, lenb);
1669 /* Transform the text into the basis of comparison, so that byte
1670 strings that would otherwise considered to be equal are
1671 considered equal here even if their bytes differ. */
1674 char stackbuf[4000];
1675 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1676 bool a_fits = tlena <= sizeof stackbuf;
1677 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1678 (a_fits ? sizeof stackbuf - tlena : 0),
1681 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1685 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1686 faster by avoiding the need for an extra buffer copy. */
1687 buf = xmalloc (tlena + tlenb + 1);
1688 xmemxfrm (buf, tlena + 1, texta, lena);
1689 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1692 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1694 if (buf != stackbuf)
1701 /* Compare two lines A and B trying every key in sequence until there
1702 are no more keys or a difference is found. */
1705 keycompare (const struct line *a, const struct line *b)
1707 struct keyfield const *key = keylist;
1709 /* For the first iteration only, the key positions have been
1710 precomputed for us. */
1711 char *texta = a->keybeg;
1712 char *textb = b->keybeg;
1713 char *lima = a->keylim;
1714 char *limb = b->keylim;
1720 char const *translate = key->translate;
1721 bool const *ignore = key->ignore;
1723 /* Find the lengths. */
1724 size_t lena = lima <= texta ? 0 : lima - texta;
1725 size_t lenb = limb <= textb ? 0 : limb - textb;
1727 /* Actually compare the fields. */
1730 diff = compare_random (texta, lena, textb, lenb);
1731 else if (key->numeric | key->general_numeric)
1733 char savea = *lima, saveb = *limb;
1735 *lima = *limb = '\0';
1736 diff = ((key->numeric ? numcompare : general_numcompare)
1738 *lima = savea, *limb = saveb;
1740 else if (key->month)
1741 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1742 /* Sorting like this may become slow, so in a simple locale the user
1743 can select a faster sort that is similar to ascii sort. */
1744 else if (hard_LC_COLLATE)
1746 if (ignore || translate)
1749 size_t size = lena + 1 + lenb + 1;
1750 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1751 char *copy_b = copy_a + lena + 1;
1752 size_t new_len_a, new_len_b, i;
1754 /* Ignore and/or translate chars before comparing. */
1755 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1759 copy_a[new_len_a] = (translate
1760 ? translate[to_uchar (texta[i])]
1762 if (!ignore || !ignore[to_uchar (texta[i])])
1767 copy_b[new_len_b] = (translate
1768 ? translate[to_uchar (textb[i])]
1770 if (!ignore || !ignore[to_uchar (textb[i])])
1775 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1777 if (sizeof buf < size)
1781 diff = - NONZERO (lenb);
1785 diff = xmemcoll (texta, lena, textb, lenb);
1789 #define CMP_WITH_IGNORE(A, B) \
1794 while (texta < lima && ignore[to_uchar (*texta)]) \
1796 while (textb < limb && ignore[to_uchar (*textb)]) \
1798 if (! (texta < lima && textb < limb)) \
1800 diff = to_uchar (A) - to_uchar (B); \
1807 diff = (texta < lima) - (textb < limb); \
1812 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1813 translate[to_uchar (*textb)]);
1815 CMP_WITH_IGNORE (*texta, *textb);
1818 diff = - NONZERO (lenb);
1825 while (texta < lima && textb < limb)
1827 diff = (to_uchar (translate[to_uchar (*texta++)])
1828 - to_uchar (translate[to_uchar (*textb++)]));
1835 diff = memcmp (texta, textb, MIN (lena, lenb));
1839 diff = lena < lenb ? -1 : lena != lenb;
1849 /* Find the beginning and limit of the next field. */
1850 if (key->eword != SIZE_MAX)
1851 lima = limfield (a, key), limb = limfield (b, key);
1853 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1855 if (key->sword != SIZE_MAX)
1856 texta = begfield (a, key), textb = begfield (b, key);
1859 texta = a->text, textb = b->text;
1860 if (key->skipsblanks)
1862 while (texta < lima && blanks[to_uchar (*texta)])
1864 while (textb < limb && blanks[to_uchar (*textb)])
1875 return key->reverse ? -diff : diff;
1878 /* Compare two lines A and B, returning negative, zero, or positive
1879 depending on whether A compares less than, equal to, or greater than B. */
1882 compare (const struct line *a, const struct line *b)
1887 /* First try to compare on the specified keys (if any).
1888 The only two cases with no key at all are unadorned sort,
1889 and unadorned sort -r. */
1892 diff = keycompare (a, b);
1893 if (diff | unique | stable)
1897 /* If the keys all compare equal (or no keys were specified)
1898 fall through to the default comparison. */
1899 alen = a->length - 1, blen = b->length - 1;
1902 diff = - NONZERO (blen);
1905 else if (hard_LC_COLLATE)
1906 diff = xmemcoll (a->text, alen, b->text, blen);
1907 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1908 diff = alen < blen ? -1 : alen != blen;
1910 return reverse ? -diff : diff;
1913 /* Check that the lines read from FILE_NAME come in order. Return
1914 true if they are in order. If CHECKONLY == 'c', also print a
1915 diagnostic (FILE_NAME, line number, contents of line) to stderr if
1916 they are not in order. */
1919 check (char const *file_name, char checkonly)
1921 FILE *fp = xfopen (file_name, "r");
1922 struct buffer buf; /* Input buffer. */
1923 struct line temp; /* Copy of previous line. */
1925 uintmax_t line_number = 0;
1926 struct keyfield const *key = keylist;
1927 bool nonunique = ! unique;
1928 bool ordered = true;
1930 initbuf (&buf, sizeof (struct line),
1931 MAX (merge_buffer_size, sort_size));
1934 while (fillbuf (&buf, fp, file_name))
1936 struct line const *line = buffer_linelim (&buf);
1937 struct line const *linebase = line - buf.nlines;
1939 /* Make sure the line saved from the old buffer contents is
1940 less than or equal to the first line of the new buffer. */
1941 if (alloc && nonunique <= compare (&temp, line - 1))
1945 if (checkonly == 'c')
1947 struct line const *disorder_line = line - 1;
1948 uintmax_t disorder_line_number =
1949 buffer_linelim (&buf) - disorder_line + line_number;
1950 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1951 fprintf (stderr, _("%s: %s:%s: disorder: "),
1952 program_name, file_name,
1953 umaxtostr (disorder_line_number, hr_buf));
1954 write_bytes (disorder_line->text, disorder_line->length,
1955 stderr, _("standard error"));
1963 /* Compare each line in the buffer with its successor. */
1964 while (linebase < --line)
1965 if (nonunique <= compare (line, line - 1))
1966 goto found_disorder;
1968 line_number += buf.nlines;
1970 /* Save the last line of the buffer. */
1971 if (alloc < line->length)
1978 alloc = line->length;
1982 while (alloc < line->length);
1984 temp.text = xrealloc (temp.text, alloc);
1986 memcpy (temp.text, line->text, line->length);
1987 temp.length = line->length;
1990 temp.keybeg = temp.text + (line->keybeg - line->text);
1991 temp.keylim = temp.text + (line->keylim - line->text);
1995 xfclose (fp, file_name);
2001 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2002 files (all of which are at the start of the FILES array), and
2003 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2004 Close input and output files before returning.
2005 OUTPUT_FILE gives the name of the output file. If it is NULL,
2006 the output file is standard output. If OFP is NULL, the output
2007 file has not been opened yet (or written to, if standard output). */
2010 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2011 FILE *ofp, char const *output_file)
2013 FILE *fps[NMERGE]; /* Input streams for each file. */
2014 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2015 struct line saved; /* Saved line storage for unique check. */
2016 struct line const *savedline = NULL;
2017 /* &saved if there is a saved line. */
2018 size_t savealloc = 0; /* Size allocated for the saved line. */
2019 struct line const *cur[NMERGE]; /* Current line in each line table. */
2020 struct line const *base[NMERGE]; /* Base of each line table. */
2021 size_t ord[NMERGE]; /* Table representing a permutation of fps,
2022 such that cur[ord[0]] is the smallest line
2023 and will be next output. */
2027 struct keyfield const *key = keylist;
2030 /* Read initial lines from each input file. */
2031 for (i = 0; i < nfiles; )
2033 fps[i] = (files[i].pid
2034 ? open_temp (files[i].name, files[i].pid)
2035 : xfopen (files[i].name, "r"));
2036 initbuf (&buffer[i], sizeof (struct line),
2037 MAX (merge_buffer_size, sort_size / nfiles));
2038 if (fillbuf (&buffer[i], fps[i], files[i].name))
2040 struct line const *linelim = buffer_linelim (&buffer[i]);
2041 cur[i] = linelim - 1;
2042 base[i] = linelim - buffer[i].nlines;
2047 /* fps[i] is empty; eliminate it from future consideration. */
2048 xfclose (fps[i], files[i].name);
2052 zaptemp (files[i].name);
2054 free (buffer[i].buf);
2056 for (j = i; j < nfiles; ++j)
2057 files[j] = files[j + 1];
2062 ofp = xfopen (output_file, "w");
2064 /* Set up the ord table according to comparisons among input lines.
2065 Since this only reorders two items if one is strictly greater than
2066 the other, it is stable. */
2067 for (i = 0; i < nfiles; ++i)
2069 for (i = 1; i < nfiles; ++i)
2070 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2071 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2073 /* Repeatedly output the smallest line until no input remains. */
2076 struct line const *smallest = cur[ord[0]];
2078 /* If uniquified output is turned on, output only the first of
2079 an identical series of lines. */
2082 if (savedline && compare (savedline, smallest))
2085 write_bytes (saved.text, saved.length, ofp, output_file);
2090 if (savealloc < smallest->length)
2095 savealloc = smallest->length;
2098 while ((savealloc *= 2) < smallest->length);
2100 saved.text = xrealloc (saved.text, savealloc);
2102 saved.length = smallest->length;
2103 memcpy (saved.text, smallest->text, saved.length);
2107 saved.text + (smallest->keybeg - smallest->text);
2109 saved.text + (smallest->keylim - smallest->text);
2114 write_bytes (smallest->text, smallest->length, ofp, output_file);
2116 /* Check if we need to read more lines into core. */
2117 if (base[ord[0]] < smallest)
2118 cur[ord[0]] = smallest - 1;
2121 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2123 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2124 cur[ord[0]] = linelim - 1;
2125 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2129 /* We reached EOF on fps[ord[0]]. */
2130 for (i = 1; i < nfiles; ++i)
2131 if (ord[i] > ord[0])
2134 xfclose (fps[ord[0]], files[ord[0]].name);
2135 if (ord[0] < ntemps)
2138 zaptemp (files[ord[0]].name);
2140 free (buffer[ord[0]].buf);
2141 for (i = ord[0]; i < nfiles; ++i)
2143 fps[i] = fps[i + 1];
2144 files[i] = files[i + 1];
2145 buffer[i] = buffer[i + 1];
2146 cur[i] = cur[i + 1];
2147 base[i] = base[i + 1];
2149 for (i = 0; i < nfiles; ++i)
2150 ord[i] = ord[i + 1];
2155 /* The new line just read in may be larger than other lines
2156 already in main memory; push it back in the queue until we
2157 encounter a line larger than it. Optimize for the common
2158 case where the new line is smallest. */
2163 size_t ord0 = ord[0];
2164 size_t count_of_smaller_lines;
2168 int cmp = compare (cur[ord0], cur[ord[probe]]);
2169 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2173 probe = (lo + hi) / 2;
2176 count_of_smaller_lines = lo - 1;
2177 for (j = 0; j < count_of_smaller_lines; j++)
2178 ord[j] = ord[j + 1];
2179 ord[count_of_smaller_lines] = ord0;
2182 /* Free up some resources every once in a while. */
2183 if (MAX_PROCS_BEFORE_REAP < nprocs)
2187 if (unique && savedline)
2189 write_bytes (saved.text, saved.length, ofp, output_file);
2193 xfclose (ofp, output_file);
2196 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2197 and HI (with NHI members). T, LO, and HI point just past their
2198 respective arrays, and the arrays are in reverse order. NLO and
2199 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2202 mergelines (struct line *t,
2203 struct line const *lo, size_t nlo,
2204 struct line const *hi, size_t nhi)
2207 if (compare (lo - 1, hi - 1) <= 0)
2212 /* HI - NHI equalled T - (NLO + NHI) when this function
2213 began. Therefore HI must equal T now, and there is no
2214 need to copy from HI to T. */
2232 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2233 NLINES must be at least 2.
2234 The input and output arrays are in reverse order, and LINES and
2235 TEMP point just past the end of their respective arrays.
2237 Use a recursive divide-and-conquer algorithm, in the style
2238 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2239 the optimization suggested by exercise 5.2.4-10; this requires room
2240 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2241 writes that this memory optimization was originally published by
2242 D. A. Bell, Comp J. 1 (1958), 75. */
2245 sortlines (struct line *lines, size_t nlines, struct line *temp)
2249 if (0 < compare (&lines[-1], &lines[-2]))
2251 struct line tmp = lines[-1];
2252 lines[-1] = lines[-2];
2258 size_t nlo = nlines / 2;
2259 size_t nhi = nlines - nlo;
2260 struct line *lo = lines;
2261 struct line *hi = lines - nlo;
2262 struct line *sorted_lo = temp;
2264 sortlines (hi, nhi, temp);
2266 sortlines_temp (lo, nlo, sorted_lo);
2268 sorted_lo[-1] = lo[-1];
2270 mergelines (lines, sorted_lo, nlo, hi, nhi);
2274 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2275 rather than sorting in place. */
2278 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2282 /* Declare `swap' as int, not bool, to work around a bug
2283 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2284 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2285 int swap = (0 < compare (&lines[-1], &lines[-2]));
2286 temp[-1] = lines[-1 - swap];
2287 temp[-2] = lines[-2 + swap];
2291 size_t nlo = nlines / 2;
2292 size_t nhi = nlines - nlo;
2293 struct line *lo = lines;
2294 struct line *hi = lines - nlo;
2295 struct line *sorted_hi = temp - nlo;
2297 sortlines_temp (hi, nhi, sorted_hi);
2299 sortlines (lo, nlo, temp);
2301 mergelines (temp, lo, nlo, sorted_hi, nhi);
2305 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2306 the same as OUTFILE. If found, merge the found instances (and perhaps
2307 some other files) into a temporary file so that it can in turn be
2308 merged into OUTFILE without destroying OUTFILE before it is completely
2309 read. Return the new value of NFILES, which differs from the old if
2310 some merging occurred.
2312 This test ensures that an otherwise-erroneous use like
2313 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2314 It's not clear that POSIX requires this nicety.
2315 Detect common error cases, but don't try to catch obscure cases like
2316 "cat ... FILE ... | sort -m -o FILE"
2317 where traditional "sort" doesn't copy the input and where
2318 people should know that they're getting into trouble anyway.
2319 Catching these obscure cases would slow down performance in
2323 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2324 size_t nfiles, char const *outfile)
2327 bool got_outstat = false;
2328 struct stat outstat;
2330 for (i = ntemps; i < nfiles; i++)
2332 bool is_stdin = STREQ (files[i].name, "-");
2336 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2343 ? stat (outfile, &outstat)
2344 : fstat (STDOUT_FILENO, &outstat))
2351 ? fstat (STDIN_FILENO, &instat)
2352 : stat (files[i].name, &instat))
2354 && SAME_INODE (instat, outstat));
2361 char *temp = create_temp (&tftp, &pid);
2362 mergefps (&files[i],0, nfiles - i, tftp, temp);
2363 files[i].name = temp;
2372 /* Merge the input FILES. NTEMPS is the number of files at the
2373 start of FILES that are temporary; it is zero at the top level.
2374 NFILES is the total number of files. Put the output in
2375 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2378 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2379 char const *output_file)
2381 while (NMERGE < nfiles)
2383 /* Number of input files processed so far. */
2386 /* Number of output files generated so far. */
2389 /* nfiles % NMERGE; this counts input files that are left over
2390 after all full-sized merges have been done. */
2393 /* Number of easily-available slots at the next loop iteration. */
2396 /* Do as many NMERGE-size merges as possible. */
2397 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2401 char *temp = create_temp (&tfp, &pid);
2402 size_t nt = MIN (ntemps, NMERGE);
2404 mergefps (&files[in], nt, NMERGE, tfp, temp);
2405 files[out].name = temp;
2406 files[out].pid = pid;
2409 remainder = nfiles - in;
2410 cheap_slots = NMERGE - out % NMERGE;
2412 if (cheap_slots < remainder)
2414 /* So many files remain that they can't all be put into the last
2415 NMERGE-sized output window. Do one more merge. Merge as few
2416 files as possible, to avoid needless I/O. */
2417 size_t nshortmerge = remainder - cheap_slots + 1;
2420 char *temp = create_temp (&tfp, &pid);
2421 size_t nt = MIN (ntemps, nshortmerge);
2423 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2424 files[out].name = temp;
2425 files[out++].pid = pid;
2429 /* Put the remaining input files into the last NMERGE-sized output
2430 window, so they will be merged in the next pass. */
2431 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2436 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2437 mergefps (files, ntemps, nfiles, NULL, output_file);
2440 /* Sort NFILES FILES onto OUTPUT_FILE. */
2443 sort (char * const *files, size_t nfiles, char const *output_file)
2447 bool output_file_created = false;
2453 char const *temp_output;
2454 char const *file = *files;
2455 FILE *fp = xfopen (file, "r");
2457 size_t bytes_per_line = (2 * sizeof (struct line)
2458 - sizeof (struct line) / 2);
2461 initbuf (&buf, bytes_per_line,
2462 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2467 while (fillbuf (&buf, fp, file))
2470 struct line *linebase;
2472 if (buf.eof && nfiles
2473 && (bytes_per_line + 1
2474 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2476 /* End of file, but there is more input and buffer room.
2477 Concatenate the next input file; this is faster in
2479 buf.left = buf.used;
2483 line = buffer_linelim (&buf);
2484 linebase = line - buf.nlines;
2486 sortlines (line, buf.nlines, linebase);
2487 if (buf.eof && !nfiles && !ntemps && !buf.left)
2490 tfp = xfopen (output_file, "w");
2491 temp_output = output_file;
2492 output_file_created = true;
2497 temp_output = create_temp (&tfp, NULL);
2503 write_bytes (line->text, line->length, tfp, temp_output);
2505 while (linebase < line && compare (line, line - 1) == 0)
2508 while (linebase < line);
2510 xfclose (tfp, temp_output);
2512 /* Free up some resources every once in a while. */
2513 if (MAX_PROCS_BEFORE_REAP < nprocs)
2516 if (output_file_created)
2525 if (! output_file_created)
2528 struct tempnode *node = temphead;
2529 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2530 for (i = 0; node; i++)
2532 tempfiles[i].name = node->name;
2533 tempfiles[i].pid = node->pid;
2536 merge (tempfiles, ntemps, ntemps, output_file);
2541 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2544 insertkey (struct keyfield *key_arg)
2546 struct keyfield **p;
2547 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2549 for (p = &keylist; *p; p = &(*p)->next)
2555 /* Report a bad field specification SPEC, with extra info MSGID. */
2557 static void badfieldspec (char const *, char const *)
2560 badfieldspec (char const *spec, char const *msgid)
2562 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2563 _(msgid), quote (spec));
2567 /* Report incompatible options. */
2569 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2571 incompatible_options (char const *opts)
2573 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2577 /* Check compatibility of ordering options. */
2580 check_ordering_compatibility (void)
2582 struct keyfield const *key;
2584 for (key = keylist; key; key = key->next)
2585 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2587 || (key->random && key->translate))
2591 if (key->ignore == nondictionary)
2595 if (key->general_numeric)
2597 if (key->ignore == nonprinting)
2606 incompatible_options (opts);
2610 /* Parse the leading integer in STRING and store the resulting value
2611 (which must fit into size_t) into *VAL. Return the address of the
2612 suffix after the integer. If the value is too large, silently
2613 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2614 failure; otherwise, report MSGID and exit on failure. */
2617 parse_field_count (char const *string, size_t *val, char const *msgid)
2622 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2625 case LONGINT_INVALID_SUFFIX_CHAR:
2630 case LONGINT_OVERFLOW:
2631 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2635 case LONGINT_INVALID:
2637 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2638 _(msgid), quote (string));
2645 /* Handle interrupts and hangups. */
2648 sighandler (int sig)
2651 signal (sig, SIG_IGN);
2655 signal (sig, SIG_DFL);
2659 /* Set the ordering options for KEY specified in S.
2660 Return the address of the first character in S that
2661 is not a valid ordering option.
2662 BLANKTYPE is the kind of blanks that 'b' should skip. */
2665 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2672 if (blanktype == bl_start || blanktype == bl_both)
2673 key->skipsblanks = true;
2674 if (blanktype == bl_end || blanktype == bl_both)
2675 key->skipeblanks = true;
2678 key->ignore = nondictionary;
2681 key->translate = fold_toupper;
2684 key->general_numeric = true;
2687 /* Option order should not matter, so don't let -i override
2688 -d. -d implies -i, but -i does not imply -d. */
2690 key->ignore = nonprinting;
2696 key->numeric = true;
2702 key->reverse = true;
2712 static struct keyfield *
2713 key_init (struct keyfield *key)
2715 memset (key, 0, sizeof *key);
2716 key->eword = SIZE_MAX;
2721 main (int argc, char **argv)
2723 struct keyfield *key;
2724 struct keyfield key_buf;
2725 struct keyfield gkey;
2729 bool mergeonly = false;
2730 char *random_source = NULL;
2731 bool need_random = false;
2733 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2734 bool obsolete_usage = (posix2_version () < 200112);
2736 char const *outfile = NULL;
2738 initialize_main (&argc, &argv);
2739 program_name = argv[0];
2740 setlocale (LC_ALL, "");
2741 bindtextdomain (PACKAGE, LOCALEDIR);
2742 textdomain (PACKAGE);
2744 initialize_exit_failure (SORT_FAILURE);
2746 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2747 #if HAVE_NL_LANGINFO
2748 hard_LC_TIME = hard_locale (LC_TIME);
2751 /* Get locale's representation of the decimal point. */
2753 struct lconv const *locale = localeconv ();
2755 /* If the locale doesn't define a decimal point, or if the decimal
2756 point is multibyte, use the C locale's decimal point. FIXME:
2757 add support for multibyte decimal points. */
2758 decimal_point = to_uchar (locale->decimal_point[0]);
2759 if (! decimal_point || locale->decimal_point[1])
2760 decimal_point = '.';
2762 /* FIXME: add support for multibyte thousands separators. */
2763 thousands_sep = to_uchar (*locale->thousands_sep);
2764 if (! thousands_sep || locale->thousands_sep[1])
2768 have_read_stdin = false;
2773 static int const sig[] =
2775 /* The usual suspects. */
2776 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2793 enum { nsigs = sizeof sig / sizeof sig[0] };
2796 struct sigaction act;
2798 sigemptyset (&caught_signals);
2799 for (i = 0; i < nsigs; i++)
2801 sigaction (sig[i], NULL, &act);
2802 if (act.sa_handler != SIG_IGN)
2803 sigaddset (&caught_signals, sig[i]);
2806 act.sa_handler = sighandler;
2807 act.sa_mask = caught_signals;
2810 for (i = 0; i < nsigs; i++)
2811 if (sigismember (&caught_signals, sig[i]))
2812 sigaction (sig[i], &act, NULL);
2814 for (i = 0; i < nsigs; i++)
2815 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2817 signal (sig[i], sighandler);
2818 siginterrupt (sig[i], 1);
2823 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2824 atexit (exit_cleanup);
2826 gkey.sword = gkey.eword = SIZE_MAX;
2828 gkey.translate = NULL;
2829 gkey.numeric = gkey.general_numeric = gkey.random = false;
2830 gkey.month = gkey.reverse = false;
2831 gkey.skipsblanks = gkey.skipeblanks = false;
2833 files = xnmalloc (argc, sizeof *files);
2837 /* Parse an operand as a file after "--" was seen; or if
2838 pedantic and a file was seen, unless the POSIX version
2839 predates 1003.1-2001 and -c was not seen and the operand is
2840 "-o FILE" or "-oFILE". */
2843 || (posixly_correct && nfiles != 0
2844 && ! (obsolete_usage
2847 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2848 && (argv[optind][2] || optind + 1 != argc)))
2849 || ((c = getopt_long (argc, argv, short_options,
2850 long_options, NULL))
2855 files[nfiles++] = argv[optind++];
2861 if (optarg[0] == '+')
2863 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2864 && ISDIGIT (argv[optind][1]));
2865 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2868 /* Treat +POS1 [-POS2] as a key if possible; but silently
2869 treat an operand as a file if it is not a valid +POS1. */
2870 key = key_init (&key_buf);
2871 s = parse_field_count (optarg + 1, &key->sword, NULL);
2873 s = parse_field_count (s + 1, &key->schar, NULL);
2874 if (! (key->sword | key->schar))
2875 key->sword = SIZE_MAX;
2876 if (! s || *set_ordering (s, key, bl_start))
2883 if (minus_pos_usage)
2885 char const *optarg1 = argv[optind++];
2886 s = parse_field_count (optarg1 + 1, &key->eword,
2887 N_("invalid number after `-'"));
2889 s = parse_field_count (s + 1, &key->echar,
2890 N_("invalid number after `.'"));
2891 if (*set_ordering (s, key, bl_end))
2892 badfieldspec (optarg1,
2893 N_("stray character in field spec"));
2900 files[nfiles++] = optarg;
2916 set_ordering (str, &gkey, bl_both);
2922 ? XARGMATCH ("--check", optarg, check_args, check_types)
2927 if (checkonly && checkonly != c)
2928 incompatible_options ("cC");
2932 case COMPRESS_PROGRAM_OPTION:
2933 if (compress_program && strcmp (compress_program, optarg) != 0)
2934 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2935 compress_program = optarg;
2939 key = key_init (&key_buf);
2942 s = parse_field_count (optarg, &key->sword,
2943 N_("invalid number at field start"));
2946 /* Provoke with `sort -k0' */
2947 badfieldspec (optarg, N_("field number is zero"));
2951 s = parse_field_count (s + 1, &key->schar,
2952 N_("invalid number after `.'"));
2955 /* Provoke with `sort -k1.0' */
2956 badfieldspec (optarg, N_("character offset is zero"));
2959 if (! (key->sword | key->schar))
2960 key->sword = SIZE_MAX;
2961 s = set_ordering (s, key, bl_start);
2964 key->eword = SIZE_MAX;
2970 s = parse_field_count (s + 1, &key->eword,
2971 N_("invalid number after `,'"));
2974 /* Provoke with `sort -k1,0' */
2975 badfieldspec (optarg, N_("field number is zero"));
2978 s = parse_field_count (s + 1, &key->echar,
2979 N_("invalid number after `.'"));
2982 /* `-k 2,3' is equivalent to `+1 -3'. */
2985 s = set_ordering (s, key, bl_end);
2988 badfieldspec (optarg, N_("stray character in field spec"));
2997 if (outfile && !STREQ (outfile, optarg))
2998 error (SORT_FAILURE, 0, _("multiple output files specified"));
3002 case RANDOM_SOURCE_OPTION:
3003 if (random_source && !STREQ (random_source, optarg))
3004 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3005 random_source = optarg;
3013 specify_sort_size (optarg);
3018 char newtab = optarg[0];
3020 error (SORT_FAILURE, 0, _("empty tab"));
3023 if (STREQ (optarg, "\\0"))
3027 /* Provoke with `sort -txx'. Complain about
3028 "multi-character tab" instead of "multibyte tab", so
3029 that the diagnostic's wording does not need to be
3030 changed once multibyte characters are supported. */
3031 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3035 if (tab != TAB_DEFAULT && tab != newtab)
3036 error (SORT_FAILURE, 0, _("incompatible tabs"));
3042 add_temp_dir (optarg);
3050 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3051 through Solaris 7. It is also accepted by many non-Solaris
3052 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3053 -y is marked as obsolete starting with Solaris 8 (1999), but is
3054 still accepted as of Solaris 10 prerelease (2004).
3056 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3057 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3058 and which in general ignores the argument after "-y" if it
3059 consists entirely of digits (it can even be empty). */
3060 if (optarg == argv[optind - 1])
3063 for (p = optarg; ISDIGIT (*p); p++)
3065 optind -= (*p != '\0');
3073 case_GETOPT_HELP_CHAR;
3075 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3078 usage (SORT_FAILURE);
3082 /* Inheritance of global options to individual keys. */
3083 for (key = keylist; key; key = key->next)
3085 if (! (key->ignore || key->translate
3086 || (key->skipsblanks | key->reverse
3087 | key->skipeblanks | key->month | key->numeric
3088 | key->general_numeric
3091 key->ignore = gkey.ignore;
3092 key->translate = gkey.translate;
3093 key->skipsblanks = gkey.skipsblanks;
3094 key->skipeblanks = gkey.skipeblanks;
3095 key->month = gkey.month;
3096 key->numeric = gkey.numeric;
3097 key->general_numeric = gkey.general_numeric;
3098 key->random = gkey.random;
3099 key->reverse = gkey.reverse;
3102 need_random |= key->random;
3105 if (!keylist && (gkey.ignore || gkey.translate
3106 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3107 | gkey.numeric | gkey.general_numeric
3111 need_random |= gkey.random;
3114 check_ordering_compatibility ();
3116 reverse = gkey.reverse;
3120 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3121 if (! randread_source)
3122 die (_("open failed"), random_source);
3125 if (temp_dir_count == 0)
3127 char const *tmp_dir = getenv ("TMPDIR");
3128 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3133 static char *minus = "-";
3142 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3143 quote (files[1]), checkonly);
3147 static char opts[] = {0, 'o', 0};
3148 opts[0] = checkonly;
3149 incompatible_options (opts);
3152 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3153 input is not properly sorted. */
3154 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3159 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3162 for (i = 0; i < nfiles; ++i)
3163 sortfiles[i].name = files[i];
3165 merge (sortfiles, 0, nfiles, outfile);
3168 sort (files, nfiles, outfile);
3170 if (have_read_stdin && fclose (stdin) == EOF)
3171 die (_("close failed"), "-");
3173 exit (EXIT_SUCCESS);