1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
79 #if !defined OPEN_MAX && defined NR_OPEN
80 # define OPEN_MAX NR_OPEN
86 #define UCHAR_LIM (UCHAR_MAX + 1)
88 #ifndef DEFAULT_TMPDIR
89 # define DEFAULT_TMPDIR "/tmp"
95 /* POSIX says to exit with status 1 if invoked with -c and the
96 input is not properly sorted. */
97 SORT_OUT_OF_ORDER = 1,
99 /* POSIX says any other irregular exit must exit with a status
100 code greater than 1. */
106 /* The number of times we should try to fork a compression process
107 (we retry if the fork call fails). We don't _need_ to compress
108 temp files, this is just to reduce disk access, so this number
110 MAX_FORK_TRIES_COMPRESS = 2,
112 /* The number of times we should try to fork a decompression process.
113 If we can't fork a decompression process, we can't sort, so this
114 number should be big. */
115 MAX_FORK_TRIES_DECOMPRESS = 8
118 /* The representation of the decimal point in the current locale. */
119 static int decimal_point;
121 /* Thousands separator; if -1, then there isn't one. */
122 static int thousands_sep;
124 /* Nonzero if the corresponding locales are hard. */
125 static bool hard_LC_COLLATE;
127 static bool hard_LC_TIME;
130 #define NONZERO(x) ((x) != 0)
132 /* The kind of blanks for '-b' to skip in various options. */
133 enum blanktype { bl_start, bl_end, bl_both };
135 /* The character marking end of line. Default to \n. */
136 static char eolchar = '\n';
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 size_t length; /* Length including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
150 char *buf; /* Dynamically allocated buffer,
151 partitioned into 3 regions:
154 - an array of lines, in reverse order. */
155 size_t used; /* Number of bytes used for input data. */
156 size_t nlines; /* Number of lines in the line array. */
157 size_t alloc; /* Number of bytes allocated. */
158 size_t left; /* Number of bytes left from previous reads. */
159 size_t line_bytes; /* Number of bytes to reserve for each line. */
160 bool eof; /* An EOF has been read. */
165 size_t sword; /* Zero-origin 'word' to start at. */
166 size_t schar; /* Additional characters to skip. */
167 size_t eword; /* Zero-origin first word after field. */
168 size_t echar; /* Additional characters in field. */
169 bool const *ignore; /* Boolean array of characters to ignore. */
170 char const *translate; /* Translation applied to characters. */
171 bool skipsblanks; /* Skip leading blanks when finding start. */
172 bool skipeblanks; /* Skip leading blanks when finding end. */
173 bool numeric; /* Flag for numeric comparison. Handle
174 strings of digits with optional decimal
175 point, but no exponential notation. */
176 bool random; /* Sort by random hash of key. */
177 bool general_numeric; /* Flag for general, numeric comparison.
178 Handle numbers in exponential notation. */
179 bool month; /* Flag for comparison by month name. */
180 bool reverse; /* Reverse the sense of comparison. */
181 bool version; /* sort by version number */
182 struct keyfield *next; /* Next keyfield to try. */
191 /* FIXME: None of these tables work with multibyte character sets.
192 Also, there are many other bugs when handling multibyte characters.
193 One way to fix this is to rewrite `sort' to use wide characters
194 internally, but doing this with good performance is a bit
197 /* Table of blanks. */
198 static bool blanks[UCHAR_LIM];
200 /* Table of non-printing characters. */
201 static bool nonprinting[UCHAR_LIM];
203 /* Table of non-dictionary characters (not letters, digits, or blanks). */
204 static bool nondictionary[UCHAR_LIM];
206 /* Translation table folding lower case to upper. */
207 static char fold_toupper[UCHAR_LIM];
209 #define MONTHS_PER_YEAR 12
211 /* Table mapping month names to integers.
212 Alphabetic order allows binary search. */
213 static struct month monthtab[] =
229 /* During the merge phase, the number of files to merge at once. */
230 #define NMERGE_DEFAULT 16
232 /* Minimum size for a merge or check buffer. */
233 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
235 /* Minimum sort size; the code might not work with smaller sizes. */
236 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
238 /* The number of bytes needed for a merge or check buffer, which can
239 function relatively efficiently even if it holds only one line. If
240 a longer line is seen, this value is increased. */
241 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
243 /* The approximate maximum number of bytes of main memory to use, as
244 specified by the user. Zero if the user has not specified a size. */
245 static size_t sort_size;
247 /* The guessed size for non-regular files. */
248 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
250 /* Array of directory names in which any temporary files are to be created. */
251 static char const **temp_dirs;
253 /* Number of temporary directory names used. */
254 static size_t temp_dir_count;
256 /* Number of allocated slots in temp_dirs. */
257 static size_t temp_dir_alloc;
259 /* Flag to reverse the order of all comparisons. */
262 /* Flag for stable sort. This turns off the last ditch bytewise
263 comparison of lines, and instead leaves lines in the same order
264 they were read if all keys compare equal. */
267 /* If TAB has this value, blanks separate fields. */
268 enum { TAB_DEFAULT = CHAR_MAX + 1 };
270 /* Tab character separating fields. If TAB_DEFAULT, then fields are
271 separated by the empty string between a non-blank character and a blank
273 static int tab = TAB_DEFAULT;
275 /* Flag to remove consecutive duplicate lines from the output.
276 Only the last of a sequence of equal lines will be output. */
279 /* Nonzero if any of the input files are the standard input. */
280 static bool have_read_stdin;
282 /* List of key field comparisons to be tried. */
283 static struct keyfield *keylist;
285 /* Program used to (de)compress temp files. Must accept -d. */
286 static char const *compress_program;
288 /* Maximum number of files to merge in one go. If more than this
289 number are present, temp files will be used. */
290 static unsigned int nmerge = NMERGE_DEFAULT;
292 static void sortlines_temp (struct line *, size_t, struct line *);
294 /* Report MESSAGE for FILE, then clean up and exit.
295 If FILE is null, it represents standard output. */
297 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
299 die (char const *message, char const *file)
301 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
308 if (status != EXIT_SUCCESS)
309 fprintf (stderr, _("Try `%s --help' for more information.\n"),
314 Usage: %s [OPTION]... [FILE]...\n\
315 or: %s [OPTION]... --files0-from=F\n\
317 program_name, program_name);
319 Write sorted concatenation of all FILE(s) to standard output.\n\
323 Mandatory arguments to long options are mandatory for short options too.\n\
330 -b, --ignore-leading-blanks ignore leading blanks\n\
331 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
332 -f, --ignore-case fold lower case to upper case characters\n\
335 -g, --general-numeric-sort compare according to general numerical value\n\
336 -i, --ignore-nonprinting consider only printable characters\n\
337 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
338 -n, --numeric-sort compare according to string numerical value\n\
339 -R, --random-sort sort by random hash of keys\n\
340 -V, --version-sort sort by numeric version (see strverscmp(3C))\n\
341 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
342 --sort=WORD sort according to WORD:\n\
343 general-numeric -g, month -M, numeric -n,\n\
344 random -R, version -V\n\
345 -r, --reverse reverse the result of comparisons\n\
353 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
354 for more use temp files\n\
357 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
358 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
359 --compress-program=PROG compress temporaries with PROG;\n\
360 decompress them with PROG -d\n\
361 --files0-from=F read input from the files specified by\n\
362 NUL-terminated names in file F\n\
365 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
366 -m, --merge merge already sorted files; do not sort\n\
369 -o, --output=FILE write result to FILE instead of standard output\n\
370 -s, --stable stabilize sort by disabling last-resort comparison\n\
371 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
374 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
375 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
376 multiple options specify multiple directories\n\
377 -u, --unique with -c, check for strict ordering;\n\
378 without -c, output only the first of an equal run\n\
381 -z, --zero-terminated end lines with 0 byte, not newline\n\
383 fputs (HELP_OPTION_DESCRIPTION, stdout);
384 fputs (VERSION_OPTION_DESCRIPTION, stdout);
387 POS is F[.C][OPTS], where F is the field number and C the character position\n\
388 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
389 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
390 one or more single-letter ordering options, which override global ordering\n\
391 options for that key. If no key is given, use the entire line as the key.\n\
393 SIZE may be followed by the following multiplicative suffixes:\n\
396 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
398 With no FILE, or when FILE is -, read standard input.\n\
401 The locale specified by the environment affects sort order.\n\
402 Set LC_ALL=C to get the traditional sort order that uses\n\
403 native byte values.\n\
405 emit_bug_reporting_address ();
411 /* For long options that have no equivalent short option, use a
412 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
415 CHECK_OPTION = CHAR_MAX + 1,
416 COMPRESS_PROGRAM_OPTION,
419 RANDOM_SOURCE_OPTION,
423 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
425 static struct option const long_options[] =
427 {"ignore-leading-blanks", no_argument, NULL, 'b'},
428 {"check", optional_argument, NULL, CHECK_OPTION},
429 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
430 {"dictionary-order", no_argument, NULL, 'd'},
431 {"ignore-case", no_argument, NULL, 'f'},
432 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
433 {"general-numeric-sort", no_argument, NULL, 'g'},
434 {"ignore-nonprinting", no_argument, NULL, 'i'},
435 {"key", required_argument, NULL, 'k'},
436 {"merge", no_argument, NULL, 'm'},
437 {"month-sort", no_argument, NULL, 'M'},
438 {"numeric-sort", no_argument, NULL, 'n'},
439 {"version-sort", no_argument, NULL, 'V'},
440 {"random-sort", no_argument, NULL, 'R'},
441 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
442 {"sort", required_argument, NULL, SORT_OPTION},
443 {"output", required_argument, NULL, 'o'},
444 {"reverse", no_argument, NULL, 'r'},
445 {"stable", no_argument, NULL, 's'},
446 {"batch-size", required_argument, NULL, NMERGE_OPTION},
447 {"buffer-size", required_argument, NULL, 'S'},
448 {"field-separator", required_argument, NULL, 't'},
449 {"temporary-directory", required_argument, NULL, 'T'},
450 {"unique", no_argument, NULL, 'u'},
451 {"zero-terminated", no_argument, NULL, 'z'},
452 {GETOPT_HELP_OPTION_DECL},
453 {GETOPT_VERSION_OPTION_DECL},
457 #define CHECK_TABLE \
459 _ct_("silent", 'C') \
460 _ct_("diagnose-first", 'c')
462 static char const *const check_args[] =
464 #define _ct_(_s, _c) _s,
468 static char const check_types[] =
470 #define _ct_(_s, _c) _c,
476 _st_("general-numeric", 'g') \
478 _st_("numeric", 'n') \
479 _st_("random", 'R') \
482 static char const *const sort_args[] =
484 #define _st_(_s, _c) _s,
488 static char const sort_types[] =
490 #define _st_(_s, _c) _c,
495 /* The set of signals that are caught. */
496 static sigset_t caught_signals;
498 /* Critical section status. */
505 /* Enter a critical section. */
506 static struct cs_status
509 struct cs_status status;
510 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
514 /* Leave a critical section. */
516 cs_leave (struct cs_status status)
520 /* Ignore failure when restoring the signal mask. */
521 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
525 /* The list of temporary files. */
528 struct tempnode *volatile next;
529 pid_t pid; /* If compressed, the pid of compressor, else zero */
530 char name[1]; /* Actual size is 1 + file name length. */
532 static struct tempnode *volatile temphead;
533 static struct tempnode *volatile *temptail = &temphead;
538 pid_t pid; /* If compressed, the pid of compressor, else zero */
541 /* A table where we store compression process states. We clean up all
542 processes in a timely manner so as not to exhaust system resources,
543 so we store the info on whether the process is still running, or has
545 static Hash_table *proctab;
547 enum { INIT_PROCTAB_SIZE = 47 };
549 enum procstate { ALIVE, ZOMBIE };
551 /* A proctab entry. The COUNT field is there in case we fork a new
552 compression process that has the same PID as an old zombie process
553 that is still in the table (because the process to decompress the
554 temp file it was associated with hasn't started yet). */
558 enum procstate state;
563 proctab_hasher (const void *entry, size_t tabsize)
565 const struct procnode *node = entry;
566 return node->pid % tabsize;
570 proctab_comparator (const void *e1, const void *e2)
572 const struct procnode *n1 = e1, *n2 = e2;
573 return n1->pid == n2->pid;
576 /* The total number of forked processes (compressors and decompressors)
577 that have not been reaped yet. */
578 static size_t nprocs;
580 /* The number of child processes we'll allow before we try to reap some. */
581 enum { MAX_PROCS_BEFORE_REAP = 2 };
583 /* If 0 < PID, wait for the child process with that PID to exit.
584 If PID is -1, clean up a random child process which has finished and
585 return the process ID of that child. If PID is -1 and no processes
586 have quit yet, return 0 without waiting. */
592 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
595 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
599 if (! WIFEXITED (status) || WEXITSTATUS (status))
600 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
608 /* Add the PID of a running compression process to proctab, or update
609 the entry COUNT and STATE fields if it's already there. This also
610 creates the table for us the first time it's called. */
613 register_proc (pid_t pid)
615 struct procnode test, *node;
619 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
628 node = hash_lookup (proctab, &test);
636 node = xmalloc (sizeof *node);
640 hash_insert (proctab, node);
644 /* This is called when we reap a random process. We don't know
645 whether we have reaped a compression process or a decompression
646 process until we look in the table. If there's an ALIVE entry for
647 it, then we have reaped a compression process, so change the state
648 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
651 update_proc (pid_t pid)
653 struct procnode test, *node;
656 node = hash_lookup (proctab, &test);
658 node->state = ZOMBIE;
661 /* This is for when we need to wait for a compression process to exit.
662 If it has a ZOMBIE entry in the table then it's already dead and has
663 been reaped. Note that if there's an ALIVE entry for it, it still may
664 already have died and been reaped if a second process was created with
665 the same PID. This is probably exceedingly rare, but to be on the safe
666 side we will have to wait for any compression process with this PID. */
669 wait_proc (pid_t pid)
671 struct procnode test, *node;
674 node = hash_lookup (proctab, &test);
675 if (node->state == ALIVE)
678 node->state = ZOMBIE;
681 hash_delete (proctab, node);
686 /* Keep reaping finished children as long as there are more to reap.
687 This doesn't block waiting for any of them, it only reaps those
688 that are already dead. */
695 while (0 < nprocs && (pid = reap (-1)))
699 /* Clean up any remaining temporary files. */
704 struct tempnode const *node;
706 for (node = temphead; node; node = node->next)
711 /* Cleanup actions to take when exiting. */
718 /* Clean up any remaining temporary files in a critical section so
719 that a signal handler does not try to clean them too. */
720 struct cs_status cs = cs_enter ();
728 /* Create a new temporary file, returning its newly allocated tempnode.
729 Store into *PFD the file descriptor open for writing. */
731 static struct tempnode *
732 create_temp_file (int *pfd)
734 static char const slashbase[] = "/sortXXXXXX";
735 static size_t temp_dir_index;
738 char const *temp_dir = temp_dirs[temp_dir_index];
739 size_t len = strlen (temp_dir);
740 struct tempnode *node =
741 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
742 char *file = node->name;
745 memcpy (file, temp_dir, len);
746 memcpy (file + len, slashbase, sizeof slashbase);
749 if (++temp_dir_index == temp_dir_count)
752 /* Create the temporary file in a critical section, to avoid races. */
758 temptail = &node->next;
765 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
772 /* Return a stream for FILE, opened with mode HOW. A null FILE means
773 standard output; HOW should be "w". When opening for input, "-"
774 means standard input. To avoid confusion, do not return file
775 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
776 opening an ordinary FILE. */
779 xfopen (const char *file, const char *how)
785 else if (STREQ (file, "-") && *how == 'r')
787 have_read_stdin = true;
792 fp = fopen (file, how);
794 die (_("open failed"), file);
800 /* Close FP, whose name is FILE, and report any errors. */
803 xfclose (FILE *fp, char const *file)
808 /* Allow reading stdin from tty more than once. */
814 /* Don't close stdout just yet. close_stdout does that. */
815 if (fflush (fp) != 0)
816 die (_("fflush failed"), file);
820 if (fclose (fp) != 0)
821 die (_("close failed"), file);
827 dup2_or_die (int oldfd, int newfd)
829 if (dup2 (oldfd, newfd) < 0)
830 error (SORT_FAILURE, errno, _("dup2 failed"));
833 /* Fork a child process for piping to and do common cleanup. The
834 TRIES parameter tells us how many times to try to fork before
835 giving up. Return the PID of the child or -1 if fork failed. */
838 pipe_fork (int pipefds[2], size_t tries)
840 #if HAVE_WORKING_FORK
841 struct tempnode *saved_temphead;
843 unsigned int wait_retry = 1;
844 pid_t pid IF_LINT (= -1);
847 if (pipe (pipefds) < 0)
852 /* This is so the child process won't delete our temp files
853 if it receives a signal before exec-ing. */
855 saved_temphead = temphead;
861 temphead = saved_temphead;
866 if (0 <= pid || errno != EAGAIN)
883 close (STDIN_FILENO);
884 close (STDOUT_FILENO);
891 #else /* ! HAVE_WORKING_FORK */
896 /* Create a temporary file and start a compression program to filter output
897 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
898 set it to the PID of the newly-created process. */
901 create_temp (FILE **pfp, pid_t *ppid)
904 struct tempnode *node = create_temp_file (&tempfd);
905 char *name = node->name;
907 if (compress_program)
911 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
918 register_proc (node->pid);
920 else if (node->pid == 0)
923 dup2_or_die (tempfd, STDOUT_FILENO);
925 dup2_or_die (pipefds[0], STDIN_FILENO);
928 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
929 error (SORT_FAILURE, errno, _("couldn't execute %s"),
936 *pfp = fdopen (tempfd, "w");
938 die (_("couldn't create temporary file"), name);
946 /* Open a compressed temp file and start a decompression process through
947 which to filter the input. PID must be the valid processes ID of the
948 process used to compress the file. */
951 open_temp (const char *name, pid_t pid)
953 int tempfd, pipefds[2];
959 tempfd = open (name, O_RDONLY);
961 die (_("couldn't open temporary file"), name);
963 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
969 else if (child_pid == 0)
972 dup2_or_die (tempfd, STDIN_FILENO);
974 dup2_or_die (pipefds[1], STDOUT_FILENO);
977 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
978 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
982 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
985 fp = fdopen (pipefds[0], "r");
987 die (_("couldn't create temporary file"), name);
993 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
995 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
996 die (_("write failed"), output_file);
999 /* Append DIR to the array of temporary directory names. */
1001 add_temp_dir (char const *dir)
1003 if (temp_dir_count == temp_dir_alloc)
1004 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1006 temp_dirs[temp_dir_count++] = dir;
1009 /* Remove NAME from the list of temporary files. */
1012 zaptemp (const char *name)
1014 struct tempnode *volatile *pnode;
1015 struct tempnode *node;
1016 struct tempnode *next;
1018 int unlink_errno = 0;
1019 struct cs_status cs;
1021 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1024 /* Unlink the temporary file in a critical section to avoid races. */
1027 unlink_status = unlink (name);
1028 unlink_errno = errno;
1032 if (unlink_status != 0)
1033 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1039 #if HAVE_NL_LANGINFO
1042 struct_month_cmp (const void *m1, const void *m2)
1044 struct month const *month1 = m1;
1045 struct month const *month2 = m2;
1046 return strcmp (month1->name, month2->name);
1051 /* Initialize the character class tables. */
1058 for (i = 0; i < UCHAR_LIM; ++i)
1060 blanks[i] = !! isblank (i);
1061 nonprinting[i] = ! isprint (i);
1062 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1063 fold_toupper[i] = toupper (i);
1066 #if HAVE_NL_LANGINFO
1067 /* If we're not in the "C" locale, read different names for months. */
1070 for (i = 0; i < MONTHS_PER_YEAR; i++)
1077 s = (char *) nl_langinfo (ABMON_1 + i);
1079 monthtab[i].name = name = xmalloc (s_len + 1);
1080 monthtab[i].val = i + 1;
1082 for (j = 0; j < s_len; j++)
1083 name[j] = fold_toupper[to_uchar (s[j])];
1086 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1087 sizeof *monthtab, struct_month_cmp);
1092 /* Specify how many inputs may be merged at once.
1093 This may be set on the command-line with the
1094 --batch-size option. */
1096 specify_nmerge (int oi, char c, char const *s)
1099 struct rlimit rlimit;
1100 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1102 /* Try to find out how many file descriptors we'll be able
1103 to open. We need at least nmerge + 3 (STDIN_FILENO,
1104 STDOUT_FILENO and STDERR_FILENO). */
1105 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1110 if (e == LONGINT_OK)
1114 e = LONGINT_OVERFLOW;
1119 error (0, 0, _("invalid --%s argument %s"),
1120 long_options[oi].name, quote(s));
1121 error (SORT_FAILURE, 0,
1122 _("minimum --%s argument is %s"),
1123 long_options[oi].name, quote("2"));
1125 else if (max_nmerge < nmerge)
1127 e = LONGINT_OVERFLOW;
1134 if (e == LONGINT_OVERFLOW)
1136 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1137 error (0, 0, _("--%s argument %s too large"),
1138 long_options[oi].name, quote(s));
1139 error (SORT_FAILURE, 0,
1140 _("maximum --%s argument with current rlimit is %s"),
1141 long_options[oi].name,
1142 uinttostr (max_nmerge, max_nmerge_buf));
1145 xstrtol_fatal (e, oi, c, long_options, s);
1148 /* Specify the amount of main memory to use when sorting. */
1150 specify_sort_size (int oi, char c, char const *s)
1154 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1156 /* The default unit is KiB. */
1157 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1159 if (n <= UINTMAX_MAX / 1024)
1162 e = LONGINT_OVERFLOW;
1165 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1166 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1175 double mem = physmem_total () * n / 100;
1177 /* Use "<", not "<=", to avoid problems with rounding. */
1178 if (mem < UINTMAX_MAX)
1184 e = LONGINT_OVERFLOW;
1189 if (e == LONGINT_OK)
1191 /* If multiple sort sizes are specified, take the maximum, so
1192 that option order does not matter. */
1199 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1203 e = LONGINT_OVERFLOW;
1206 xstrtol_fatal (e, oi, c, long_options, s);
1209 /* Return the default sort size. */
1211 default_sort_size (void)
1213 /* Let MEM be available memory or 1/8 of total memory, whichever
1215 double avail = physmem_available ();
1216 double total = physmem_total ();
1217 double mem = MAX (avail, total / 8);
1218 struct rlimit rlimit;
1220 /* Let SIZE be MEM, but no more than the maximum object size or
1221 system resource limits. Avoid the MIN macro here, as it is not
1222 quite right when only one argument is floating point. Don't
1223 bother to check for values like RLIM_INFINITY since in practice
1224 they are not much less than SIZE_MAX. */
1225 size_t size = SIZE_MAX;
1228 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1229 size = rlimit.rlim_cur;
1231 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1232 size = rlimit.rlim_cur;
1235 /* Leave a large safety margin for the above limits, as failure can
1236 occur when they are exceeded. */
1240 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1241 Exceeding RSS is not fatal, but can be quite slow. */
1242 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1243 size = rlimit.rlim_cur / 16 * 15;
1246 /* Use no less than the minimum. */
1247 return MAX (size, MIN_SORT_SIZE);
1250 /* Return the sort buffer size to use with the input files identified
1251 by FPS and FILES, which are alternate names of the same files.
1252 NFILES gives the number of input files; NFPS may be less. Assume
1253 that each input line requires LINE_BYTES extra bytes' worth of line
1254 information. Do not exceed the size bound specified by the user
1255 (or a default size bound, if the user does not specify one). */
1258 sort_buffer_size (FILE *const *fps, size_t nfps,
1259 char *const *files, size_t nfiles,
1262 /* A bound on the input size. If zero, the bound hasn't been
1264 static size_t size_bound;
1266 /* In the worst case, each input byte is a newline. */
1267 size_t worst_case_per_input_byte = line_bytes + 1;
1269 /* Keep enough room for one extra input line and an extra byte.
1270 This extra room might be needed when preparing to read EOF. */
1271 size_t size = worst_case_per_input_byte + 1;
1275 for (i = 0; i < nfiles; i++)
1281 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1282 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1283 : stat (files[i], &st))
1285 die (_("stat failed"), files[i]);
1287 if (S_ISREG (st.st_mode))
1288 file_size = st.st_size;
1291 /* The file has unknown size. If the user specified a sort
1292 buffer size, use that; otherwise, guess the size. */
1295 file_size = INPUT_FILE_SIZE_GUESS;
1300 size_bound = sort_size;
1302 size_bound = default_sort_size ();
1305 /* Add the amount of memory needed to represent the worst case
1306 where the input consists entirely of newlines followed by a
1307 single non-newline. Check for overflow. */
1308 worst_case = file_size * worst_case_per_input_byte + 1;
1309 if (file_size != worst_case / worst_case_per_input_byte
1310 || size_bound - size <= worst_case)
1318 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1319 must be at least sizeof (struct line). Allocate ALLOC bytes
1323 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1325 /* Ensure that the line array is properly aligned. If the desired
1326 size cannot be allocated, repeatedly halve it until allocation
1327 succeeds. The smaller allocation may hurt overall performance,
1328 but that's better than failing. */
1331 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1332 buf->buf = malloc (alloc);
1336 if (alloc <= line_bytes + 1)
1340 buf->line_bytes = line_bytes;
1342 buf->used = buf->left = buf->nlines = 0;
1346 /* Return one past the limit of the line array. */
1348 static inline struct line *
1349 buffer_linelim (struct buffer const *buf)
1351 return (struct line *) (buf->buf + buf->alloc);
1354 /* Return a pointer to the first character of the field specified
1358 begfield (const struct line *line, const struct keyfield *key)
1360 char *ptr = line->text, *lim = ptr + line->length - 1;
1361 size_t sword = key->sword;
1362 size_t schar = key->schar;
1363 size_t remaining_bytes;
1365 /* The leading field separator itself is included in a field when -t
1368 if (tab != TAB_DEFAULT)
1369 while (ptr < lim && sword--)
1371 while (ptr < lim && *ptr != tab)
1377 while (ptr < lim && sword--)
1379 while (ptr < lim && blanks[to_uchar (*ptr)])
1381 while (ptr < lim && !blanks[to_uchar (*ptr)])
1385 if (key->skipsblanks)
1386 while (ptr < lim && blanks[to_uchar (*ptr)])
1389 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1390 remaining_bytes = lim - ptr;
1391 if (schar < remaining_bytes)
1399 /* Return the limit of (a pointer to the first character after) the field
1400 in LINE specified by KEY. */
1403 limfield (const struct line *line, const struct keyfield *key)
1405 char *ptr = line->text, *lim = ptr + line->length - 1;
1406 size_t eword = key->eword, echar = key->echar;
1407 size_t remaining_bytes;
1409 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1410 whichever comes first. If there are more than EWORD fields, leave
1411 PTR pointing at the beginning of the field having zero-based index,
1412 EWORD. If a delimiter character was specified (via -t), then that
1413 `beginning' is the first character following the delimiting TAB.
1414 Otherwise, leave PTR pointing at the first `blank' character after
1415 the preceding field. */
1416 if (tab != TAB_DEFAULT)
1417 while (ptr < lim && eword--)
1419 while (ptr < lim && *ptr != tab)
1421 if (ptr < lim && (eword | echar))
1425 while (ptr < lim && eword--)
1427 while (ptr < lim && blanks[to_uchar (*ptr)])
1429 while (ptr < lim && !blanks[to_uchar (*ptr)])
1433 #ifdef POSIX_UNSPECIFIED
1434 /* The following block of code makes GNU sort incompatible with
1435 standard Unix sort, so it's ifdef'd out for now.
1436 The POSIX spec isn't clear on how to interpret this.
1437 FIXME: request clarification.
1439 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1440 Date: Thu, 30 May 96 12:20:41 -0400
1441 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1443 [...]I believe I've found another bug in `sort'.
1448 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1451 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1455 Unix sort produced the answer I expected: sort on the single character
1456 in column 7. GNU sort produced different results, because it disagrees
1457 on the interpretation of the key-end spec "M.N". Unix sort reads this
1458 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1459 "skip M-1 fields, then either N-1 characters or the rest of the current
1460 field, whichever comes first". This extra clause applies only to
1461 key-ends, not key-starts.
1464 /* Make LIM point to the end of (one byte past) the current field. */
1465 if (tab != TAB_DEFAULT)
1468 newlim = memchr (ptr, tab, lim - ptr);
1476 while (newlim < lim && blanks[to_uchar (*newlim)])
1478 while (newlim < lim && !blanks[to_uchar (*newlim)])
1484 /* If we're ignoring leading blanks when computing the End
1485 of the field, don't start counting bytes until after skipping
1486 past any leading blanks. */
1487 if (key->skipeblanks)
1488 while (ptr < lim && blanks[to_uchar (*ptr)])
1491 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1492 remaining_bytes = lim - ptr;
1493 if (echar < remaining_bytes)
1501 /* Fill BUF reading from FP, moving buf->left bytes from the end
1502 of buf->buf to the beginning first. If EOF is reached and the
1503 file wasn't terminated by a newline, supply one. Set up BUF's line
1504 table too. FILE is the name of the file corresponding to FP.
1505 Return true if some input was read. */
1508 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1510 struct keyfield const *key = keylist;
1512 size_t line_bytes = buf->line_bytes;
1513 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1518 if (buf->used != buf->left)
1520 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1521 buf->used = buf->left;
1527 char *ptr = buf->buf + buf->used;
1528 struct line *linelim = buffer_linelim (buf);
1529 struct line *line = linelim - buf->nlines;
1530 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1531 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1533 while (line_bytes + 1 < avail)
1535 /* Read as many bytes as possible, but do not read so many
1536 bytes that there might not be enough room for the
1537 corresponding line array. The worst case is when the
1538 rest of the input file consists entirely of newlines,
1539 except that the last byte is not a newline. */
1540 size_t readsize = (avail - 1) / (line_bytes + 1);
1541 size_t bytes_read = fread (ptr, 1, readsize, fp);
1542 char *ptrlim = ptr + bytes_read;
1544 avail -= bytes_read;
1546 if (bytes_read != readsize)
1549 die (_("read failed"), file);
1553 if (buf->buf == ptrlim)
1555 if (ptrlim[-1] != eol)
1560 /* Find and record each line in the just-read input. */
1561 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1565 line->text = line_start;
1566 line->length = ptr - line_start;
1567 mergesize = MAX (mergesize, line->length);
1568 avail -= line_bytes;
1572 /* Precompute the position of the first key for
1574 line->keylim = (key->eword == SIZE_MAX
1576 : limfield (line, key));
1578 if (key->sword != SIZE_MAX)
1579 line->keybeg = begfield (line, key);
1582 if (key->skipsblanks)
1583 while (blanks[to_uchar (*line_start)])
1585 line->keybeg = line_start;
1597 buf->used = ptr - buf->buf;
1598 buf->nlines = buffer_linelim (buf) - line;
1599 if (buf->nlines != 0)
1601 buf->left = ptr - line_start;
1602 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1607 /* The current input line is too long to fit in the buffer.
1608 Double the buffer size and try again, keeping it properly
1610 size_t line_alloc = buf->alloc / sizeof (struct line);
1611 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1612 buf->alloc = line_alloc * sizeof (struct line);
1617 /* Compare strings A and B as numbers without explicitly converting them to
1618 machine numbers. Comparatively slow for short strings, but asymptotically
1622 numcompare (const char *a, const char *b)
1624 while (blanks[to_uchar (*a)])
1626 while (blanks[to_uchar (*b)])
1629 return strnumcmp (a, b, decimal_point, thousands_sep);
1633 general_numcompare (const char *sa, const char *sb)
1635 /* FIXME: add option to warn about failed conversions. */
1636 /* FIXME: maybe add option to try expensive FP conversion
1637 only if A and B can't be compared more cheaply/accurately. */
1641 double a = strtod (sa, &ea);
1642 double b = strtod (sb, &eb);
1644 /* Put conversion errors at the start of the collating sequence. */
1646 return sb == eb ? 0 : -1;
1650 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1651 conversion errors but before numbers; sort them by internal
1652 bit-pattern, for lack of a more portable alternative. */
1658 : memcmp ((char *) &a, (char *) &b, sizeof a));
1661 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1662 Return 0 if the name in S is not recognized. */
1665 getmonth (char const *month, size_t len)
1668 size_t hi = MONTHS_PER_YEAR;
1669 char const *monthlim = month + len;
1673 if (month == monthlim)
1675 if (!blanks[to_uchar (*month)])
1682 size_t ix = (lo + hi) / 2;
1683 char const *m = month;
1684 char const *n = monthtab[ix].name;
1689 return monthtab[ix].val;
1690 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1695 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1707 /* A source of random data. */
1708 static struct randread_source *randread_source;
1710 /* Return the Ith randomly-generated state. The caller must invoke
1711 random_state (H) for all H less than I before invoking random_state
1714 static struct md5_ctx
1715 random_state (size_t i)
1717 /* An array of states resulting from the random data, and counts of
1718 its used and allocated members. */
1719 static struct md5_ctx *state;
1721 static size_t allocated;
1723 struct md5_ctx *s = &state[i];
1727 unsigned char buf[MD5_DIGEST_SIZE];
1733 state = X2NREALLOC (state, &allocated);
1737 randread (randread_source, buf, sizeof buf);
1739 md5_process_bytes (buf, sizeof buf, s);
1745 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1746 with length LENGTHB. Return negative if less, zero if equal,
1747 positive if greater. */
1750 cmp_hashes (char const *texta, size_t lena,
1751 char const *textb, size_t lenb)
1753 /* Try random hashes until a pair of hashes disagree. But if the
1754 first pair of random hashes agree, check whether the keys are
1755 identical and if so report no difference. */
1760 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1761 struct md5_ctx s[2];
1762 s[0] = s[1] = random_state (i);
1763 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1764 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1765 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1768 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1775 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1776 using one or more random hash functions. */
1779 compare_random (char *restrict texta, size_t lena,
1780 char *restrict textb, size_t lenb)
1784 if (! hard_LC_COLLATE)
1785 diff = cmp_hashes (texta, lena, textb, lenb);
1788 /* Transform the text into the basis of comparison, so that byte
1789 strings that would otherwise considered to be equal are
1790 considered equal here even if their bytes differ. */
1793 char stackbuf[4000];
1794 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1795 bool a_fits = tlena <= sizeof stackbuf;
1796 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1797 (a_fits ? sizeof stackbuf - tlena : 0),
1800 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1804 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1805 faster by avoiding the need for an extra buffer copy. */
1806 buf = xmalloc (tlena + tlenb + 1);
1807 xmemxfrm (buf, tlena + 1, texta, lena);
1808 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1811 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1813 if (buf != stackbuf)
1820 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1821 using strverscmp. */
1824 compare_version (char *restrict texta, size_t lena,
1825 char *restrict textb, size_t lenb)
1830 * It is necessary to save the character after the end of the field.
1831 * "strverscmp" works with NUL terminated strings. Our blocks of
1832 * text are not necessarily terminated with a NUL byte.
1834 char sv_a = texta[lena];
1835 char sv_b = textb[lenb];
1837 texta[lena] = textb[lenb] = '\0';
1838 diff = strverscmp (texta, textb);
1846 /* Compare two lines A and B trying every key in sequence until there
1847 are no more keys or a difference is found. */
1850 keycompare (const struct line *a, const struct line *b)
1852 struct keyfield const *key = keylist;
1854 /* For the first iteration only, the key positions have been
1855 precomputed for us. */
1856 char *texta = a->keybeg;
1857 char *textb = b->keybeg;
1858 char *lima = a->keylim;
1859 char *limb = b->keylim;
1865 char const *translate = key->translate;
1866 bool const *ignore = key->ignore;
1868 /* Find the lengths. */
1869 size_t lena = lima <= texta ? 0 : lima - texta;
1870 size_t lenb = limb <= textb ? 0 : limb - textb;
1872 /* Actually compare the fields. */
1875 diff = compare_random (texta, lena, textb, lenb);
1876 else if (key->numeric | key->general_numeric)
1878 char savea = *lima, saveb = *limb;
1880 *lima = *limb = '\0';
1881 diff = ((key->numeric ? numcompare : general_numcompare)
1883 *lima = savea, *limb = saveb;
1886 else if (key->version)
1887 diff = compare_version (texta, lena, textb, lenb);
1889 else if (key->month)
1890 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1891 /* Sorting like this may become slow, so in a simple locale the user
1892 can select a faster sort that is similar to ascii sort. */
1893 else if (hard_LC_COLLATE)
1895 if (ignore || translate)
1898 size_t size = lena + 1 + lenb + 1;
1899 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1900 char *copy_b = copy_a + lena + 1;
1901 size_t new_len_a, new_len_b, i;
1903 /* Ignore and/or translate chars before comparing. */
1904 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1908 copy_a[new_len_a] = (translate
1909 ? translate[to_uchar (texta[i])]
1911 if (!ignore || !ignore[to_uchar (texta[i])])
1916 copy_b[new_len_b] = (translate
1917 ? translate[to_uchar (textb[i])]
1919 if (!ignore || !ignore[to_uchar (textb[i])])
1924 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1926 if (sizeof buf < size)
1930 diff = - NONZERO (lenb);
1934 diff = xmemcoll (texta, lena, textb, lenb);
1938 #define CMP_WITH_IGNORE(A, B) \
1943 while (texta < lima && ignore[to_uchar (*texta)]) \
1945 while (textb < limb && ignore[to_uchar (*textb)]) \
1947 if (! (texta < lima && textb < limb)) \
1949 diff = to_uchar (A) - to_uchar (B); \
1956 diff = (texta < lima) - (textb < limb); \
1961 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1962 translate[to_uchar (*textb)]);
1964 CMP_WITH_IGNORE (*texta, *textb);
1967 diff = - NONZERO (lenb);
1974 while (texta < lima && textb < limb)
1976 diff = (to_uchar (translate[to_uchar (*texta++)])
1977 - to_uchar (translate[to_uchar (*textb++)]));
1984 diff = memcmp (texta, textb, MIN (lena, lenb));
1988 diff = lena < lenb ? -1 : lena != lenb;
1998 /* Find the beginning and limit of the next field. */
1999 if (key->eword != SIZE_MAX)
2000 lima = limfield (a, key), limb = limfield (b, key);
2002 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2004 if (key->sword != SIZE_MAX)
2005 texta = begfield (a, key), textb = begfield (b, key);
2008 texta = a->text, textb = b->text;
2009 if (key->skipsblanks)
2011 while (texta < lima && blanks[to_uchar (*texta)])
2013 while (textb < limb && blanks[to_uchar (*textb)])
2024 return key->reverse ? -diff : diff;
2027 /* Compare two lines A and B, returning negative, zero, or positive
2028 depending on whether A compares less than, equal to, or greater than B. */
2031 compare (const struct line *a, const struct line *b)
2036 /* First try to compare on the specified keys (if any).
2037 The only two cases with no key at all are unadorned sort,
2038 and unadorned sort -r. */
2041 diff = keycompare (a, b);
2042 if (diff | unique | stable)
2046 /* If the keys all compare equal (or no keys were specified)
2047 fall through to the default comparison. */
2048 alen = a->length - 1, blen = b->length - 1;
2051 diff = - NONZERO (blen);
2054 else if (hard_LC_COLLATE)
2055 diff = xmemcoll (a->text, alen, b->text, blen);
2056 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2057 diff = alen < blen ? -1 : alen != blen;
2059 return reverse ? -diff : diff;
2062 /* Check that the lines read from FILE_NAME come in order. Return
2063 true if they are in order. If CHECKONLY == 'c', also print a
2064 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2065 they are not in order. */
2068 check (char const *file_name, char checkonly)
2070 FILE *fp = xfopen (file_name, "r");
2071 struct buffer buf; /* Input buffer. */
2072 struct line temp; /* Copy of previous line. */
2074 uintmax_t line_number = 0;
2075 struct keyfield const *key = keylist;
2076 bool nonunique = ! unique;
2077 bool ordered = true;
2079 initbuf (&buf, sizeof (struct line),
2080 MAX (merge_buffer_size, sort_size));
2083 while (fillbuf (&buf, fp, file_name))
2085 struct line const *line = buffer_linelim (&buf);
2086 struct line const *linebase = line - buf.nlines;
2088 /* Make sure the line saved from the old buffer contents is
2089 less than or equal to the first line of the new buffer. */
2090 if (alloc && nonunique <= compare (&temp, line - 1))
2094 if (checkonly == 'c')
2096 struct line const *disorder_line = line - 1;
2097 uintmax_t disorder_line_number =
2098 buffer_linelim (&buf) - disorder_line + line_number;
2099 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2100 fprintf (stderr, _("%s: %s:%s: disorder: "),
2101 program_name, file_name,
2102 umaxtostr (disorder_line_number, hr_buf));
2103 write_bytes (disorder_line->text, disorder_line->length,
2104 stderr, _("standard error"));
2112 /* Compare each line in the buffer with its successor. */
2113 while (linebase < --line)
2114 if (nonunique <= compare (line, line - 1))
2115 goto found_disorder;
2117 line_number += buf.nlines;
2119 /* Save the last line of the buffer. */
2120 if (alloc < line->length)
2127 alloc = line->length;
2131 while (alloc < line->length);
2133 temp.text = xrealloc (temp.text, alloc);
2135 memcpy (temp.text, line->text, line->length);
2136 temp.length = line->length;
2139 temp.keybeg = temp.text + (line->keybeg - line->text);
2140 temp.keylim = temp.text + (line->keylim - line->text);
2144 xfclose (fp, file_name);
2150 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2151 files (all of which are at the start of the FILES array), and
2152 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2153 Close input and output files before returning.
2154 OUTPUT_FILE gives the name of the output file. If it is NULL,
2155 the output file is standard output. If OFP is NULL, the output
2156 file has not been opened yet (or written to, if standard output). */
2159 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2160 FILE *ofp, char const *output_file)
2162 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2163 /* Input streams for each file. */
2164 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2165 /* Input buffers for each file. */
2166 struct line saved; /* Saved line storage for unique check. */
2167 struct line const *savedline = NULL;
2168 /* &saved if there is a saved line. */
2169 size_t savealloc = 0; /* Size allocated for the saved line. */
2170 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2171 /* Current line in each line table. */
2172 struct line const **base = xnmalloc (nmerge, sizeof *base);
2173 /* Base of each line table. */
2174 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2175 /* Table representing a permutation of fps,
2176 such that cur[ord[0]] is the smallest line
2177 and will be next output. */
2181 struct keyfield const *key = keylist;
2184 /* Read initial lines from each input file. */
2185 for (i = 0; i < nfiles; )
2187 fps[i] = (files[i].pid
2188 ? open_temp (files[i].name, files[i].pid)
2189 : xfopen (files[i].name, "r"));
2190 initbuf (&buffer[i], sizeof (struct line),
2191 MAX (merge_buffer_size, sort_size / nfiles));
2192 if (fillbuf (&buffer[i], fps[i], files[i].name))
2194 struct line const *linelim = buffer_linelim (&buffer[i]);
2195 cur[i] = linelim - 1;
2196 base[i] = linelim - buffer[i].nlines;
2201 /* fps[i] is empty; eliminate it from future consideration. */
2202 xfclose (fps[i], files[i].name);
2206 zaptemp (files[i].name);
2208 free (buffer[i].buf);
2210 for (j = i; j < nfiles; ++j)
2211 files[j] = files[j + 1];
2216 ofp = xfopen (output_file, "w");
2218 /* Set up the ord table according to comparisons among input lines.
2219 Since this only reorders two items if one is strictly greater than
2220 the other, it is stable. */
2221 for (i = 0; i < nfiles; ++i)
2223 for (i = 1; i < nfiles; ++i)
2224 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2225 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2227 /* Repeatedly output the smallest line until no input remains. */
2230 struct line const *smallest = cur[ord[0]];
2232 /* If uniquified output is turned on, output only the first of
2233 an identical series of lines. */
2236 if (savedline && compare (savedline, smallest))
2239 write_bytes (saved.text, saved.length, ofp, output_file);
2244 if (savealloc < smallest->length)
2249 savealloc = smallest->length;
2252 while ((savealloc *= 2) < smallest->length);
2254 saved.text = xrealloc (saved.text, savealloc);
2256 saved.length = smallest->length;
2257 memcpy (saved.text, smallest->text, saved.length);
2261 saved.text + (smallest->keybeg - smallest->text);
2263 saved.text + (smallest->keylim - smallest->text);
2268 write_bytes (smallest->text, smallest->length, ofp, output_file);
2270 /* Check if we need to read more lines into core. */
2271 if (base[ord[0]] < smallest)
2272 cur[ord[0]] = smallest - 1;
2275 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2277 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2278 cur[ord[0]] = linelim - 1;
2279 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2283 /* We reached EOF on fps[ord[0]]. */
2284 for (i = 1; i < nfiles; ++i)
2285 if (ord[i] > ord[0])
2288 xfclose (fps[ord[0]], files[ord[0]].name);
2289 if (ord[0] < ntemps)
2292 zaptemp (files[ord[0]].name);
2294 free (buffer[ord[0]].buf);
2295 for (i = ord[0]; i < nfiles; ++i)
2297 fps[i] = fps[i + 1];
2298 files[i] = files[i + 1];
2299 buffer[i] = buffer[i + 1];
2300 cur[i] = cur[i + 1];
2301 base[i] = base[i + 1];
2303 for (i = 0; i < nfiles; ++i)
2304 ord[i] = ord[i + 1];
2309 /* The new line just read in may be larger than other lines
2310 already in main memory; push it back in the queue until we
2311 encounter a line larger than it. Optimize for the common
2312 case where the new line is smallest. */
2317 size_t ord0 = ord[0];
2318 size_t count_of_smaller_lines;
2322 int cmp = compare (cur[ord0], cur[ord[probe]]);
2323 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2327 probe = (lo + hi) / 2;
2330 count_of_smaller_lines = lo - 1;
2331 for (j = 0; j < count_of_smaller_lines; j++)
2332 ord[j] = ord[j + 1];
2333 ord[count_of_smaller_lines] = ord0;
2336 /* Free up some resources every once in a while. */
2337 if (MAX_PROCS_BEFORE_REAP < nprocs)
2341 if (unique && savedline)
2343 write_bytes (saved.text, saved.length, ofp, output_file);
2347 xfclose (ofp, output_file);
2355 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2356 and HI (with NHI members). T, LO, and HI point just past their
2357 respective arrays, and the arrays are in reverse order. NLO and
2358 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2361 mergelines (struct line *t,
2362 struct line const *lo, size_t nlo,
2363 struct line const *hi, size_t nhi)
2366 if (compare (lo - 1, hi - 1) <= 0)
2371 /* HI - NHI equalled T - (NLO + NHI) when this function
2372 began. Therefore HI must equal T now, and there is no
2373 need to copy from HI to T. */
2391 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2392 NLINES must be at least 2.
2393 The input and output arrays are in reverse order, and LINES and
2394 TEMP point just past the end of their respective arrays.
2396 Use a recursive divide-and-conquer algorithm, in the style
2397 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2398 the optimization suggested by exercise 5.2.4-10; this requires room
2399 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2400 writes that this memory optimization was originally published by
2401 D. A. Bell, Comp J. 1 (1958), 75. */
2404 sortlines (struct line *lines, size_t nlines, struct line *temp)
2408 if (0 < compare (&lines[-1], &lines[-2]))
2410 struct line tmp = lines[-1];
2411 lines[-1] = lines[-2];
2417 size_t nlo = nlines / 2;
2418 size_t nhi = nlines - nlo;
2419 struct line *lo = lines;
2420 struct line *hi = lines - nlo;
2421 struct line *sorted_lo = temp;
2423 sortlines (hi, nhi, temp);
2425 sortlines_temp (lo, nlo, sorted_lo);
2427 sorted_lo[-1] = lo[-1];
2429 mergelines (lines, sorted_lo, nlo, hi, nhi);
2433 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2434 rather than sorting in place. */
2437 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2441 /* Declare `swap' as int, not bool, to work around a bug
2442 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2443 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2444 int swap = (0 < compare (&lines[-1], &lines[-2]));
2445 temp[-1] = lines[-1 - swap];
2446 temp[-2] = lines[-2 + swap];
2450 size_t nlo = nlines / 2;
2451 size_t nhi = nlines - nlo;
2452 struct line *lo = lines;
2453 struct line *hi = lines - nlo;
2454 struct line *sorted_hi = temp - nlo;
2456 sortlines_temp (hi, nhi, sorted_hi);
2458 sortlines (lo, nlo, temp);
2460 mergelines (temp, lo, nlo, sorted_hi, nhi);
2464 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2465 the same as OUTFILE. If found, merge the found instances (and perhaps
2466 some other files) into a temporary file so that it can in turn be
2467 merged into OUTFILE without destroying OUTFILE before it is completely
2468 read. Return the new value of NFILES, which differs from the old if
2469 some merging occurred.
2471 This test ensures that an otherwise-erroneous use like
2472 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2473 It's not clear that POSIX requires this nicety.
2474 Detect common error cases, but don't try to catch obscure cases like
2475 "cat ... FILE ... | sort -m -o FILE"
2476 where traditional "sort" doesn't copy the input and where
2477 people should know that they're getting into trouble anyway.
2478 Catching these obscure cases would slow down performance in
2482 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2483 size_t nfiles, char const *outfile)
2486 bool got_outstat = false;
2487 struct stat outstat;
2489 for (i = ntemps; i < nfiles; i++)
2491 bool is_stdin = STREQ (files[i].name, "-");
2495 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2502 ? stat (outfile, &outstat)
2503 : fstat (STDOUT_FILENO, &outstat))
2510 ? fstat (STDIN_FILENO, &instat)
2511 : stat (files[i].name, &instat))
2513 && SAME_INODE (instat, outstat));
2520 char *temp = create_temp (&tftp, &pid);
2521 mergefps (&files[i],0, nfiles - i, tftp, temp);
2522 files[i].name = temp;
2531 /* Merge the input FILES. NTEMPS is the number of files at the
2532 start of FILES that are temporary; it is zero at the top level.
2533 NFILES is the total number of files. Put the output in
2534 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2537 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2538 char const *output_file)
2540 while (nmerge < nfiles)
2542 /* Number of input files processed so far. */
2545 /* Number of output files generated so far. */
2548 /* nfiles % NMERGE; this counts input files that are left over
2549 after all full-sized merges have been done. */
2552 /* Number of easily-available slots at the next loop iteration. */
2555 /* Do as many NMERGE-size merges as possible. */
2556 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2560 char *temp = create_temp (&tfp, &pid);
2561 size_t nt = MIN (ntemps, nmerge);
2563 mergefps (&files[in], nt, nmerge, tfp, temp);
2564 files[out].name = temp;
2565 files[out].pid = pid;
2568 remainder = nfiles - in;
2569 cheap_slots = nmerge - out % nmerge;
2571 if (cheap_slots < remainder)
2573 /* So many files remain that they can't all be put into the last
2574 NMERGE-sized output window. Do one more merge. Merge as few
2575 files as possible, to avoid needless I/O. */
2576 size_t nshortmerge = remainder - cheap_slots + 1;
2579 char *temp = create_temp (&tfp, &pid);
2580 size_t nt = MIN (ntemps, nshortmerge);
2582 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2583 files[out].name = temp;
2584 files[out++].pid = pid;
2588 /* Put the remaining input files into the last NMERGE-sized output
2589 window, so they will be merged in the next pass. */
2590 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2595 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2596 mergefps (files, ntemps, nfiles, NULL, output_file);
2599 /* Sort NFILES FILES onto OUTPUT_FILE. */
2602 sort (char * const *files, size_t nfiles, char const *output_file)
2606 bool output_file_created = false;
2612 char const *temp_output;
2613 char const *file = *files;
2614 FILE *fp = xfopen (file, "r");
2616 size_t bytes_per_line = (2 * sizeof (struct line)
2617 - sizeof (struct line) / 2);
2620 initbuf (&buf, bytes_per_line,
2621 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2626 while (fillbuf (&buf, fp, file))
2629 struct line *linebase;
2631 if (buf.eof && nfiles
2632 && (bytes_per_line + 1
2633 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2635 /* End of file, but there is more input and buffer room.
2636 Concatenate the next input file; this is faster in
2638 buf.left = buf.used;
2642 line = buffer_linelim (&buf);
2643 linebase = line - buf.nlines;
2645 sortlines (line, buf.nlines, linebase);
2646 if (buf.eof && !nfiles && !ntemps && !buf.left)
2649 tfp = xfopen (output_file, "w");
2650 temp_output = output_file;
2651 output_file_created = true;
2656 temp_output = create_temp (&tfp, NULL);
2662 write_bytes (line->text, line->length, tfp, temp_output);
2664 while (linebase < line && compare (line, line - 1) == 0)
2667 while (linebase < line);
2669 xfclose (tfp, temp_output);
2671 /* Free up some resources every once in a while. */
2672 if (MAX_PROCS_BEFORE_REAP < nprocs)
2675 if (output_file_created)
2684 if (! output_file_created)
2687 struct tempnode *node = temphead;
2688 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2689 for (i = 0; node; i++)
2691 tempfiles[i].name = node->name;
2692 tempfiles[i].pid = node->pid;
2695 merge (tempfiles, ntemps, ntemps, output_file);
2700 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2703 insertkey (struct keyfield *key_arg)
2705 struct keyfield **p;
2706 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2708 for (p = &keylist; *p; p = &(*p)->next)
2714 /* Report a bad field specification SPEC, with extra info MSGID. */
2716 static void badfieldspec (char const *, char const *)
2719 badfieldspec (char const *spec, char const *msgid)
2721 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2722 _(msgid), quote (spec));
2726 /* Report incompatible options. */
2728 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2730 incompatible_options (char const *opts)
2732 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2736 /* Check compatibility of ordering options. */
2739 check_ordering_compatibility (void)
2741 struct keyfield const *key;
2743 for (key = keylist; key; key = key->next)
2744 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2745 + key->version + !!key->ignore))
2746 || (key->random && key->translate))
2748 /* The following is too big, but guaranteed to be "big enough". */
2749 char opts[sizeof short_options];
2751 if (key->ignore == nondictionary)
2755 if (key->general_numeric)
2757 if (key->ignore == nonprinting)
2768 incompatible_options (opts);
2772 /* Parse the leading integer in STRING and store the resulting value
2773 (which must fit into size_t) into *VAL. Return the address of the
2774 suffix after the integer. If the value is too large, silently
2775 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2776 failure; otherwise, report MSGID and exit on failure. */
2779 parse_field_count (char const *string, size_t *val, char const *msgid)
2784 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2787 case LONGINT_INVALID_SUFFIX_CHAR:
2792 case LONGINT_OVERFLOW:
2793 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2797 case LONGINT_INVALID:
2799 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2800 _(msgid), quote (string));
2807 /* Handle interrupts and hangups. */
2810 sighandler (int sig)
2813 signal (sig, SIG_IGN);
2817 signal (sig, SIG_DFL);
2821 /* Set the ordering options for KEY specified in S.
2822 Return the address of the first character in S that
2823 is not a valid ordering option.
2824 BLANKTYPE is the kind of blanks that 'b' should skip. */
2827 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2834 if (blanktype == bl_start || blanktype == bl_both)
2835 key->skipsblanks = true;
2836 if (blanktype == bl_end || blanktype == bl_both)
2837 key->skipeblanks = true;
2840 key->ignore = nondictionary;
2843 key->translate = fold_toupper;
2846 key->general_numeric = true;
2849 /* Option order should not matter, so don't let -i override
2850 -d. -d implies -i, but -i does not imply -d. */
2852 key->ignore = nonprinting;
2858 key->numeric = true;
2864 key->reverse = true;
2867 key->version = true;
2877 static struct keyfield *
2878 key_init (struct keyfield *key)
2880 memset (key, 0, sizeof *key);
2881 key->eword = SIZE_MAX;
2886 main (int argc, char **argv)
2888 struct keyfield *key;
2889 struct keyfield key_buf;
2890 struct keyfield gkey;
2894 bool mergeonly = false;
2895 char *random_source = NULL;
2896 bool need_random = false;
2898 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2899 bool obsolete_usage = (posix2_version () < 200112);
2901 char *files_from = NULL;
2903 char const *outfile = NULL;
2905 initialize_main (&argc, &argv);
2906 set_program_name (argv[0]);
2907 setlocale (LC_ALL, "");
2908 bindtextdomain (PACKAGE, LOCALEDIR);
2909 textdomain (PACKAGE);
2911 initialize_exit_failure (SORT_FAILURE);
2913 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2914 #if HAVE_NL_LANGINFO
2915 hard_LC_TIME = hard_locale (LC_TIME);
2918 /* Get locale's representation of the decimal point. */
2920 struct lconv const *locale = localeconv ();
2922 /* If the locale doesn't define a decimal point, or if the decimal
2923 point is multibyte, use the C locale's decimal point. FIXME:
2924 add support for multibyte decimal points. */
2925 decimal_point = to_uchar (locale->decimal_point[0]);
2926 if (! decimal_point || locale->decimal_point[1])
2927 decimal_point = '.';
2929 /* FIXME: add support for multibyte thousands separators. */
2930 thousands_sep = to_uchar (*locale->thousands_sep);
2931 if (! thousands_sep || locale->thousands_sep[1])
2935 have_read_stdin = false;
2940 static int const sig[] =
2942 /* The usual suspects. */
2943 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2960 enum { nsigs = sizeof sig / sizeof sig[0] };
2963 struct sigaction act;
2965 sigemptyset (&caught_signals);
2966 for (i = 0; i < nsigs; i++)
2968 sigaction (sig[i], NULL, &act);
2969 if (act.sa_handler != SIG_IGN)
2970 sigaddset (&caught_signals, sig[i]);
2973 act.sa_handler = sighandler;
2974 act.sa_mask = caught_signals;
2977 for (i = 0; i < nsigs; i++)
2978 if (sigismember (&caught_signals, sig[i]))
2979 sigaction (sig[i], &act, NULL);
2981 for (i = 0; i < nsigs; i++)
2982 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2984 signal (sig[i], sighandler);
2985 siginterrupt (sig[i], 1);
2990 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2991 atexit (exit_cleanup);
2993 gkey.sword = gkey.eword = SIZE_MAX;
2995 gkey.translate = NULL;
2996 gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
2997 gkey.month = gkey.reverse = false;
2998 gkey.skipsblanks = gkey.skipeblanks = false;
3000 files = xnmalloc (argc, sizeof *files);
3004 /* Parse an operand as a file after "--" was seen; or if
3005 pedantic and a file was seen, unless the POSIX version
3006 predates 1003.1-2001 and -c was not seen and the operand is
3007 "-o FILE" or "-oFILE". */
3011 || (posixly_correct && nfiles != 0
3012 && ! (obsolete_usage
3015 && argv[optind][0] == '-' && argv[optind][1] == 'o'
3016 && (argv[optind][2] || optind + 1 != argc)))
3017 || ((c = getopt_long (argc, argv, short_options,
3023 files[nfiles++] = argv[optind++];
3029 if (optarg[0] == '+')
3031 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3032 && ISDIGIT (argv[optind][1]));
3033 obsolete_usage |= minus_pos_usage & ~posixly_correct;
3036 /* Treat +POS1 [-POS2] as a key if possible; but silently
3037 treat an operand as a file if it is not a valid +POS1. */
3038 key = key_init (&key_buf);
3039 s = parse_field_count (optarg + 1, &key->sword, NULL);
3041 s = parse_field_count (s + 1, &key->schar, NULL);
3042 if (! (key->sword | key->schar))
3043 key->sword = SIZE_MAX;
3044 if (! s || *set_ordering (s, key, bl_start))
3048 if (minus_pos_usage)
3050 char const *optarg1 = argv[optind++];
3051 s = parse_field_count (optarg1 + 1, &key->eword,
3052 N_("invalid number after `-'"));
3054 s = parse_field_count (s + 1, &key->echar,
3055 N_("invalid number after `.'"));
3056 if (*set_ordering (s, key, bl_end))
3057 badfieldspec (optarg1,
3058 N_("stray character in field spec"));
3065 files[nfiles++] = optarg;
3069 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3085 set_ordering (str, &gkey, bl_both);
3091 ? XARGMATCH ("--check", optarg, check_args, check_types)
3096 if (checkonly && checkonly != c)
3097 incompatible_options ("cC");
3101 case COMPRESS_PROGRAM_OPTION:
3102 if (compress_program && !STREQ (compress_program, optarg))
3103 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3104 compress_program = optarg;
3107 case FILES0_FROM_OPTION:
3108 files_from = optarg;
3112 key = key_init (&key_buf);
3115 s = parse_field_count (optarg, &key->sword,
3116 N_("invalid number at field start"));
3119 /* Provoke with `sort -k0' */
3120 badfieldspec (optarg, N_("field number is zero"));
3124 s = parse_field_count (s + 1, &key->schar,
3125 N_("invalid number after `.'"));
3128 /* Provoke with `sort -k1.0' */
3129 badfieldspec (optarg, N_("character offset is zero"));
3132 if (! (key->sword | key->schar))
3133 key->sword = SIZE_MAX;
3134 s = set_ordering (s, key, bl_start);
3137 key->eword = SIZE_MAX;
3143 s = parse_field_count (s + 1, &key->eword,
3144 N_("invalid number after `,'"));
3147 /* Provoke with `sort -k1,0' */
3148 badfieldspec (optarg, N_("field number is zero"));
3151 s = parse_field_count (s + 1, &key->echar,
3152 N_("invalid number after `.'"));
3155 /* `-k 2,3' is equivalent to `+1 -3'. */
3158 s = set_ordering (s, key, bl_end);
3161 badfieldspec (optarg, N_("stray character in field spec"));
3170 specify_nmerge (oi, c, optarg);
3174 if (outfile && !STREQ (outfile, optarg))
3175 error (SORT_FAILURE, 0, _("multiple output files specified"));
3179 case RANDOM_SOURCE_OPTION:
3180 if (random_source && !STREQ (random_source, optarg))
3181 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3182 random_source = optarg;
3190 specify_sort_size (oi, c, optarg);
3195 char newtab = optarg[0];
3197 error (SORT_FAILURE, 0, _("empty tab"));
3200 if (STREQ (optarg, "\\0"))
3204 /* Provoke with `sort -txx'. Complain about
3205 "multi-character tab" instead of "multibyte tab", so
3206 that the diagnostic's wording does not need to be
3207 changed once multibyte characters are supported. */
3208 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3212 if (tab != TAB_DEFAULT && tab != newtab)
3213 error (SORT_FAILURE, 0, _("incompatible tabs"));
3219 add_temp_dir (optarg);
3227 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3228 through Solaris 7. It is also accepted by many non-Solaris
3229 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3230 -y is marked as obsolete starting with Solaris 8 (1999), but is
3231 still accepted as of Solaris 10 prerelease (2004).
3233 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3234 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3235 and which in general ignores the argument after "-y" if it
3236 consists entirely of digits (it can even be empty). */
3237 if (optarg == argv[optind - 1])
3240 for (p = optarg; ISDIGIT (*p); p++)
3242 optind -= (*p != '\0');
3250 case_GETOPT_HELP_CHAR;
3252 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3255 usage (SORT_FAILURE);
3263 /* When using --files0-from=F, you may not specify any files
3264 on the command-line. */
3267 error (0, 0, _("extra operand %s"), quote (files[0]));
3268 fprintf (stderr, "%s\n",
3269 _("file operands cannot be combined with --files0-from"));
3270 usage (SORT_FAILURE);
3273 if (STREQ (files_from, "-"))
3277 stream = fopen (files_from, "r");
3279 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3280 quote (files_from));
3283 readtokens0_init (&tok);
3285 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3286 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3287 quote (files_from));
3295 for (i = 0; i < nfiles; i++)
3297 if (STREQ (files[i], "-"))
3298 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3299 "no file name of %s allowed"),
3301 else if (files[i][0] == '\0')
3303 /* Using the standard `filename:line-number:' prefix here is
3304 not totally appropriate, since NUL is the separator, not NL,
3305 but it might be better than nothing. */
3306 unsigned long int file_number = i + 1;
3307 error (SORT_FAILURE, 0,
3308 _("%s:%lu: invalid zero-length file name"),
3309 quotearg_colon (files_from), file_number);
3314 error (SORT_FAILURE, 0, _("no input from %s"),
3315 quote (files_from));
3318 /* Inheritance of global options to individual keys. */
3319 for (key = keylist; key; key = key->next)
3323 || (key->skipsblanks
3329 | key->general_numeric
3332 key->ignore = gkey.ignore;
3333 key->translate = gkey.translate;
3334 key->skipsblanks = gkey.skipsblanks;
3335 key->skipeblanks = gkey.skipeblanks;
3336 key->month = gkey.month;
3337 key->numeric = gkey.numeric;
3338 key->general_numeric = gkey.general_numeric;
3339 key->random = gkey.random;
3340 key->reverse = gkey.reverse;
3341 key->version = gkey.version;
3344 need_random |= key->random;
3347 if (!keylist && (gkey.ignore
3349 || (gkey.skipsblanks
3353 | gkey.general_numeric
3358 need_random |= gkey.random;
3361 check_ordering_compatibility ();
3363 reverse = gkey.reverse;
3367 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3368 if (! randread_source)
3369 die (_("open failed"), random_source);
3372 if (temp_dir_count == 0)
3374 char const *tmp_dir = getenv ("TMPDIR");
3375 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3380 static char *minus = "-";
3386 /* Need to re-check that we meet the minimum requirement for memory
3387 usage with the final value for NMERGE. */
3389 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3394 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3395 quote (files[1]), checkonly);
3399 static char opts[] = {0, 'o', 0};
3400 opts[0] = checkonly;
3401 incompatible_options (opts);
3404 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3405 input is not properly sorted. */
3406 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3411 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3414 for (i = 0; i < nfiles; ++i)
3415 sortfiles[i].name = files[i];
3417 merge (sortfiles, 0, nfiles, outfile);
3418 IF_LINT (free (sortfiles));
3421 sort (files, nfiles, outfile);
3423 if (have_read_stdin && fclose (stdin) == EOF)
3424 die (_("close failed"), "-");
3426 exit (EXIT_SUCCESS);