1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
79 #if !defined OPEN_MAX && defined NR_OPEN
80 # define OPEN_MAX NR_OPEN
86 #define UCHAR_LIM (UCHAR_MAX + 1)
88 #ifndef DEFAULT_TMPDIR
89 # define DEFAULT_TMPDIR "/tmp"
95 /* POSIX says to exit with status 1 if invoked with -c and the
96 input is not properly sorted. */
97 SORT_OUT_OF_ORDER = 1,
99 /* POSIX says any other irregular exit must exit with a status
100 code greater than 1. */
106 /* The number of times we should try to fork a compression process
107 (we retry if the fork call fails). We don't _need_ to compress
108 temp files, this is just to reduce disk access, so this number
110 MAX_FORK_TRIES_COMPRESS = 2,
112 /* The number of times we should try to fork a decompression process.
113 If we can't fork a decompression process, we can't sort, so this
114 number should be big. */
115 MAX_FORK_TRIES_DECOMPRESS = 8
118 /* The representation of the decimal point in the current locale. */
119 static int decimal_point;
121 /* Thousands separator; if -1, then there isn't one. */
122 static int thousands_sep;
124 /* Nonzero if the corresponding locales are hard. */
125 static bool hard_LC_COLLATE;
127 static bool hard_LC_TIME;
130 #define NONZERO(x) ((x) != 0)
132 /* The kind of blanks for '-b' to skip in various options. */
133 enum blanktype { bl_start, bl_end, bl_both };
135 /* The character marking end of line. Default to \n. */
136 static char eolchar = '\n';
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 size_t length; /* Length including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
150 char *buf; /* Dynamically allocated buffer,
151 partitioned into 3 regions:
154 - an array of lines, in reverse order. */
155 size_t used; /* Number of bytes used for input data. */
156 size_t nlines; /* Number of lines in the line array. */
157 size_t alloc; /* Number of bytes allocated. */
158 size_t left; /* Number of bytes left from previous reads. */
159 size_t line_bytes; /* Number of bytes to reserve for each line. */
160 bool eof; /* An EOF has been read. */
165 size_t sword; /* Zero-origin 'word' to start at. */
166 size_t schar; /* Additional characters to skip. */
167 size_t eword; /* Zero-origin first word after field. */
168 size_t echar; /* Additional characters in field. */
169 bool const *ignore; /* Boolean array of characters to ignore. */
170 char const *translate; /* Translation applied to characters. */
171 bool skipsblanks; /* Skip leading blanks when finding start. */
172 bool skipeblanks; /* Skip leading blanks when finding end. */
173 bool numeric; /* Flag for numeric comparison. Handle
174 strings of digits with optional decimal
175 point, but no exponential notation. */
176 bool random; /* Sort by random hash of key. */
177 bool general_numeric; /* Flag for general, numeric comparison.
178 Handle numbers in exponential notation. */
179 bool month; /* Flag for comparison by month name. */
180 bool reverse; /* Reverse the sense of comparison. */
181 bool version; /* sort by version number */
182 struct keyfield *next; /* Next keyfield to try. */
191 /* FIXME: None of these tables work with multibyte character sets.
192 Also, there are many other bugs when handling multibyte characters.
193 One way to fix this is to rewrite `sort' to use wide characters
194 internally, but doing this with good performance is a bit
197 /* Table of blanks. */
198 static bool blanks[UCHAR_LIM];
200 /* Table of non-printing characters. */
201 static bool nonprinting[UCHAR_LIM];
203 /* Table of non-dictionary characters (not letters, digits, or blanks). */
204 static bool nondictionary[UCHAR_LIM];
206 /* Translation table folding lower case to upper. */
207 static char fold_toupper[UCHAR_LIM];
209 #define MONTHS_PER_YEAR 12
211 /* Table mapping month names to integers.
212 Alphabetic order allows binary search. */
213 static struct month monthtab[] =
229 /* During the merge phase, the number of files to merge at once. */
230 #define NMERGE_DEFAULT 16
232 /* Minimum size for a merge or check buffer. */
233 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
235 /* Minimum sort size; the code might not work with smaller sizes. */
236 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
238 /* The number of bytes needed for a merge or check buffer, which can
239 function relatively efficiently even if it holds only one line. If
240 a longer line is seen, this value is increased. */
241 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
243 /* The approximate maximum number of bytes of main memory to use, as
244 specified by the user. Zero if the user has not specified a size. */
245 static size_t sort_size;
247 /* The guessed size for non-regular files. */
248 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
250 /* Array of directory names in which any temporary files are to be created. */
251 static char const **temp_dirs;
253 /* Number of temporary directory names used. */
254 static size_t temp_dir_count;
256 /* Number of allocated slots in temp_dirs. */
257 static size_t temp_dir_alloc;
259 /* Flag to reverse the order of all comparisons. */
262 /* Flag for stable sort. This turns off the last ditch bytewise
263 comparison of lines, and instead leaves lines in the same order
264 they were read if all keys compare equal. */
267 /* If TAB has this value, blanks separate fields. */
268 enum { TAB_DEFAULT = CHAR_MAX + 1 };
270 /* Tab character separating fields. If TAB_DEFAULT, then fields are
271 separated by the empty string between a non-blank character and a blank
273 static int tab = TAB_DEFAULT;
275 /* Flag to remove consecutive duplicate lines from the output.
276 Only the last of a sequence of equal lines will be output. */
279 /* Nonzero if any of the input files are the standard input. */
280 static bool have_read_stdin;
282 /* List of key field comparisons to be tried. */
283 static struct keyfield *keylist;
285 /* Program used to (de)compress temp files. Must accept -d. */
286 static char const *compress_program;
288 /* Maximum number of files to merge in one go. If more than this
289 number are present, temp files will be used. */
290 static unsigned int nmerge = NMERGE_DEFAULT;
292 static void sortlines_temp (struct line *, size_t, struct line *);
294 /* Report MESSAGE for FILE, then clean up and exit.
295 If FILE is null, it represents standard output. */
297 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
299 die (char const *message, char const *file)
301 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
308 if (status != EXIT_SUCCESS)
309 fprintf (stderr, _("Try `%s --help' for more information.\n"),
314 Usage: %s [OPTION]... [FILE]...\n\
315 or: %s [OPTION]... --files0-from=F\n\
317 program_name, program_name);
319 Write sorted concatenation of all FILE(s) to standard output.\n\
323 Mandatory arguments to long options are mandatory for short options too.\n\
330 -b, --ignore-leading-blanks ignore leading blanks\n\
331 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
332 -f, --ignore-case fold lower case to upper case characters\n\
335 -g, --general-numeric-sort compare according to general numerical value\n\
336 -i, --ignore-nonprinting consider only printable characters\n\
337 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
338 -n, --numeric-sort compare according to string numerical value\n\
339 -R, --random-sort sort by random hash of keys\n\
340 -V, --version-sort sort by numeric version (see strverscmp(3C))\n\
341 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
342 --sort=WORD sort according to WORD:\n\
343 general-numeric -g, month -M, numeric -n,\n\
344 random -R, version -V\n\
345 -r, --reverse reverse the result of comparisons\n\
353 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
354 for more use temp files\n\
357 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
358 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
359 --compress-program=PROG compress temporaries with PROG;\n\
360 decompress them with PROG -d\n\
361 --files0-from=F read input from the files specified by\n\
362 NUL-terminated names in file F\n\
365 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
366 (default end of line)\n\
367 -m, --merge merge already sorted files; do not sort\n\
370 -o, --output=FILE write result to FILE instead of standard output\n\
371 -s, --stable stabilize sort by disabling last-resort comparison\n\
372 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
375 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
376 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
377 multiple options specify multiple directories\n\
378 -u, --unique with -c, check for strict ordering;\n\
379 without -c, output only the first of an equal run\n\
382 -z, --zero-terminated end lines with 0 byte, not newline\n\
384 fputs (HELP_OPTION_DESCRIPTION, stdout);
385 fputs (VERSION_OPTION_DESCRIPTION, stdout);
388 POS is F[.C][OPTS], where F is the field number and C the character position\n\
389 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
390 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
391 one or more single-letter ordering options, which override global ordering\n\
392 options for that key. If no key is given, use the entire line as the key.\n\
394 SIZE may be followed by the following multiplicative suffixes:\n\
397 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
399 With no FILE, or when FILE is -, read standard input.\n\
402 The locale specified by the environment affects sort order.\n\
403 Set LC_ALL=C to get the traditional sort order that uses\n\
404 native byte values.\n\
406 emit_bug_reporting_address ();
412 /* For long options that have no equivalent short option, use a
413 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
416 CHECK_OPTION = CHAR_MAX + 1,
417 COMPRESS_PROGRAM_OPTION,
420 RANDOM_SOURCE_OPTION,
424 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
426 static struct option const long_options[] =
428 {"ignore-leading-blanks", no_argument, NULL, 'b'},
429 {"check", optional_argument, NULL, CHECK_OPTION},
430 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
431 {"dictionary-order", no_argument, NULL, 'd'},
432 {"ignore-case", no_argument, NULL, 'f'},
433 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
434 {"general-numeric-sort", no_argument, NULL, 'g'},
435 {"ignore-nonprinting", no_argument, NULL, 'i'},
436 {"key", required_argument, NULL, 'k'},
437 {"merge", no_argument, NULL, 'm'},
438 {"month-sort", no_argument, NULL, 'M'},
439 {"numeric-sort", no_argument, NULL, 'n'},
440 {"version-sort", no_argument, NULL, 'V'},
441 {"random-sort", no_argument, NULL, 'R'},
442 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
443 {"sort", required_argument, NULL, SORT_OPTION},
444 {"output", required_argument, NULL, 'o'},
445 {"reverse", no_argument, NULL, 'r'},
446 {"stable", no_argument, NULL, 's'},
447 {"batch-size", required_argument, NULL, NMERGE_OPTION},
448 {"buffer-size", required_argument, NULL, 'S'},
449 {"field-separator", required_argument, NULL, 't'},
450 {"temporary-directory", required_argument, NULL, 'T'},
451 {"unique", no_argument, NULL, 'u'},
452 {"zero-terminated", no_argument, NULL, 'z'},
453 {GETOPT_HELP_OPTION_DECL},
454 {GETOPT_VERSION_OPTION_DECL},
458 #define CHECK_TABLE \
460 _ct_("silent", 'C') \
461 _ct_("diagnose-first", 'c')
463 static char const *const check_args[] =
465 #define _ct_(_s, _c) _s,
469 static char const check_types[] =
471 #define _ct_(_s, _c) _c,
477 _st_("general-numeric", 'g') \
479 _st_("numeric", 'n') \
480 _st_("random", 'R') \
483 static char const *const sort_args[] =
485 #define _st_(_s, _c) _s,
489 static char const sort_types[] =
491 #define _st_(_s, _c) _c,
496 /* The set of signals that are caught. */
497 static sigset_t caught_signals;
499 /* Critical section status. */
506 /* Enter a critical section. */
507 static struct cs_status
510 struct cs_status status;
511 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
515 /* Leave a critical section. */
517 cs_leave (struct cs_status status)
521 /* Ignore failure when restoring the signal mask. */
522 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
526 /* The list of temporary files. */
529 struct tempnode *volatile next;
530 pid_t pid; /* If compressed, the pid of compressor, else zero */
531 char name[1]; /* Actual size is 1 + file name length. */
533 static struct tempnode *volatile temphead;
534 static struct tempnode *volatile *temptail = &temphead;
539 pid_t pid; /* If compressed, the pid of compressor, else zero */
542 /* A table where we store compression process states. We clean up all
543 processes in a timely manner so as not to exhaust system resources,
544 so we store the info on whether the process is still running, or has
546 static Hash_table *proctab;
548 enum { INIT_PROCTAB_SIZE = 47 };
550 enum procstate { ALIVE, ZOMBIE };
552 /* A proctab entry. The COUNT field is there in case we fork a new
553 compression process that has the same PID as an old zombie process
554 that is still in the table (because the process to decompress the
555 temp file it was associated with hasn't started yet). */
559 enum procstate state;
564 proctab_hasher (const void *entry, size_t tabsize)
566 const struct procnode *node = entry;
567 return node->pid % tabsize;
571 proctab_comparator (const void *e1, const void *e2)
573 const struct procnode *n1 = e1, *n2 = e2;
574 return n1->pid == n2->pid;
577 /* The total number of forked processes (compressors and decompressors)
578 that have not been reaped yet. */
579 static size_t nprocs;
581 /* The number of child processes we'll allow before we try to reap some. */
582 enum { MAX_PROCS_BEFORE_REAP = 2 };
584 /* If 0 < PID, wait for the child process with that PID to exit.
585 If PID is -1, clean up a random child process which has finished and
586 return the process ID of that child. If PID is -1 and no processes
587 have quit yet, return 0 without waiting. */
593 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
596 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
600 if (! WIFEXITED (status) || WEXITSTATUS (status))
601 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
609 /* Add the PID of a running compression process to proctab, or update
610 the entry COUNT and STATE fields if it's already there. This also
611 creates the table for us the first time it's called. */
614 register_proc (pid_t pid)
616 struct procnode test, *node;
620 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
629 node = hash_lookup (proctab, &test);
637 node = xmalloc (sizeof *node);
641 hash_insert (proctab, node);
645 /* This is called when we reap a random process. We don't know
646 whether we have reaped a compression process or a decompression
647 process until we look in the table. If there's an ALIVE entry for
648 it, then we have reaped a compression process, so change the state
649 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
652 update_proc (pid_t pid)
654 struct procnode test, *node;
657 node = hash_lookup (proctab, &test);
659 node->state = ZOMBIE;
662 /* This is for when we need to wait for a compression process to exit.
663 If it has a ZOMBIE entry in the table then it's already dead and has
664 been reaped. Note that if there's an ALIVE entry for it, it still may
665 already have died and been reaped if a second process was created with
666 the same PID. This is probably exceedingly rare, but to be on the safe
667 side we will have to wait for any compression process with this PID. */
670 wait_proc (pid_t pid)
672 struct procnode test, *node;
675 node = hash_lookup (proctab, &test);
676 if (node->state == ALIVE)
679 node->state = ZOMBIE;
682 hash_delete (proctab, node);
687 /* Keep reaping finished children as long as there are more to reap.
688 This doesn't block waiting for any of them, it only reaps those
689 that are already dead. */
696 while (0 < nprocs && (pid = reap (-1)))
700 /* Clean up any remaining temporary files. */
705 struct tempnode const *node;
707 for (node = temphead; node; node = node->next)
712 /* Cleanup actions to take when exiting. */
719 /* Clean up any remaining temporary files in a critical section so
720 that a signal handler does not try to clean them too. */
721 struct cs_status cs = cs_enter ();
729 /* Create a new temporary file, returning its newly allocated tempnode.
730 Store into *PFD the file descriptor open for writing. */
732 static struct tempnode *
733 create_temp_file (int *pfd)
735 static char const slashbase[] = "/sortXXXXXX";
736 static size_t temp_dir_index;
739 char const *temp_dir = temp_dirs[temp_dir_index];
740 size_t len = strlen (temp_dir);
741 struct tempnode *node =
742 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
743 char *file = node->name;
746 memcpy (file, temp_dir, len);
747 memcpy (file + len, slashbase, sizeof slashbase);
750 if (++temp_dir_index == temp_dir_count)
753 /* Create the temporary file in a critical section, to avoid races. */
759 temptail = &node->next;
766 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
773 /* Return a stream for FILE, opened with mode HOW. A null FILE means
774 standard output; HOW should be "w". When opening for input, "-"
775 means standard input. To avoid confusion, do not return file
776 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
777 opening an ordinary FILE. */
780 xfopen (const char *file, const char *how)
786 else if (STREQ (file, "-") && *how == 'r')
788 have_read_stdin = true;
793 fp = fopen (file, how);
795 die (_("open failed"), file);
801 /* Close FP, whose name is FILE, and report any errors. */
804 xfclose (FILE *fp, char const *file)
809 /* Allow reading stdin from tty more than once. */
815 /* Don't close stdout just yet. close_stdout does that. */
816 if (fflush (fp) != 0)
817 die (_("fflush failed"), file);
821 if (fclose (fp) != 0)
822 die (_("close failed"), file);
828 dup2_or_die (int oldfd, int newfd)
830 if (dup2 (oldfd, newfd) < 0)
831 error (SORT_FAILURE, errno, _("dup2 failed"));
834 /* Fork a child process for piping to and do common cleanup. The
835 TRIES parameter tells us how many times to try to fork before
836 giving up. Return the PID of the child or -1 if fork failed. */
839 pipe_fork (int pipefds[2], size_t tries)
841 #if HAVE_WORKING_FORK
842 struct tempnode *saved_temphead;
844 unsigned int wait_retry = 1;
845 pid_t pid IF_LINT (= -1);
848 if (pipe (pipefds) < 0)
853 /* This is so the child process won't delete our temp files
854 if it receives a signal before exec-ing. */
856 saved_temphead = temphead;
862 temphead = saved_temphead;
867 if (0 <= pid || errno != EAGAIN)
884 close (STDIN_FILENO);
885 close (STDOUT_FILENO);
892 #else /* ! HAVE_WORKING_FORK */
897 /* Create a temporary file and start a compression program to filter output
898 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
899 set it to the PID of the newly-created process. */
902 create_temp (FILE **pfp, pid_t *ppid)
905 struct tempnode *node = create_temp_file (&tempfd);
906 char *name = node->name;
908 if (compress_program)
912 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
919 register_proc (node->pid);
921 else if (node->pid == 0)
924 dup2_or_die (tempfd, STDOUT_FILENO);
926 dup2_or_die (pipefds[0], STDIN_FILENO);
929 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
930 error (SORT_FAILURE, errno, _("couldn't execute %s"),
937 *pfp = fdopen (tempfd, "w");
939 die (_("couldn't create temporary file"), name);
947 /* Open a compressed temp file and start a decompression process through
948 which to filter the input. PID must be the valid processes ID of the
949 process used to compress the file. */
952 open_temp (const char *name, pid_t pid)
954 int tempfd, pipefds[2];
960 tempfd = open (name, O_RDONLY);
962 die (_("couldn't open temporary file"), name);
964 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
970 else if (child_pid == 0)
973 dup2_or_die (tempfd, STDIN_FILENO);
975 dup2_or_die (pipefds[1], STDOUT_FILENO);
978 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
979 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
983 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
986 fp = fdopen (pipefds[0], "r");
988 die (_("couldn't create temporary file"), name);
994 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
996 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
997 die (_("write failed"), output_file);
1000 /* Append DIR to the array of temporary directory names. */
1002 add_temp_dir (char const *dir)
1004 if (temp_dir_count == temp_dir_alloc)
1005 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1007 temp_dirs[temp_dir_count++] = dir;
1010 /* Remove NAME from the list of temporary files. */
1013 zaptemp (const char *name)
1015 struct tempnode *volatile *pnode;
1016 struct tempnode *node;
1017 struct tempnode *next;
1019 int unlink_errno = 0;
1020 struct cs_status cs;
1022 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1025 /* Unlink the temporary file in a critical section to avoid races. */
1028 unlink_status = unlink (name);
1029 unlink_errno = errno;
1033 if (unlink_status != 0)
1034 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1040 #if HAVE_NL_LANGINFO
1043 struct_month_cmp (const void *m1, const void *m2)
1045 struct month const *month1 = m1;
1046 struct month const *month2 = m2;
1047 return strcmp (month1->name, month2->name);
1052 /* Initialize the character class tables. */
1059 for (i = 0; i < UCHAR_LIM; ++i)
1061 blanks[i] = !! isblank (i);
1062 nonprinting[i] = ! isprint (i);
1063 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1064 fold_toupper[i] = toupper (i);
1067 #if HAVE_NL_LANGINFO
1068 /* If we're not in the "C" locale, read different names for months. */
1071 for (i = 0; i < MONTHS_PER_YEAR; i++)
1078 s = (char *) nl_langinfo (ABMON_1 + i);
1080 monthtab[i].name = name = xmalloc (s_len + 1);
1081 monthtab[i].val = i + 1;
1083 for (j = 0; j < s_len; j++)
1084 name[j] = fold_toupper[to_uchar (s[j])];
1087 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1088 sizeof *monthtab, struct_month_cmp);
1093 /* Specify how many inputs may be merged at once.
1094 This may be set on the command-line with the
1095 --batch-size option. */
1097 specify_nmerge (int oi, char c, char const *s)
1100 struct rlimit rlimit;
1101 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1103 /* Try to find out how many file descriptors we'll be able
1104 to open. We need at least nmerge + 3 (STDIN_FILENO,
1105 STDOUT_FILENO and STDERR_FILENO). */
1106 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1111 if (e == LONGINT_OK)
1115 e = LONGINT_OVERFLOW;
1120 error (0, 0, _("invalid --%s argument %s"),
1121 long_options[oi].name, quote(s));
1122 error (SORT_FAILURE, 0,
1123 _("minimum --%s argument is %s"),
1124 long_options[oi].name, quote("2"));
1126 else if (max_nmerge < nmerge)
1128 e = LONGINT_OVERFLOW;
1135 if (e == LONGINT_OVERFLOW)
1137 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1138 error (0, 0, _("--%s argument %s too large"),
1139 long_options[oi].name, quote(s));
1140 error (SORT_FAILURE, 0,
1141 _("maximum --%s argument with current rlimit is %s"),
1142 long_options[oi].name,
1143 uinttostr (max_nmerge, max_nmerge_buf));
1146 xstrtol_fatal (e, oi, c, long_options, s);
1149 /* Specify the amount of main memory to use when sorting. */
1151 specify_sort_size (int oi, char c, char const *s)
1155 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1157 /* The default unit is KiB. */
1158 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1160 if (n <= UINTMAX_MAX / 1024)
1163 e = LONGINT_OVERFLOW;
1166 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1167 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1176 double mem = physmem_total () * n / 100;
1178 /* Use "<", not "<=", to avoid problems with rounding. */
1179 if (mem < UINTMAX_MAX)
1185 e = LONGINT_OVERFLOW;
1190 if (e == LONGINT_OK)
1192 /* If multiple sort sizes are specified, take the maximum, so
1193 that option order does not matter. */
1200 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1204 e = LONGINT_OVERFLOW;
1207 xstrtol_fatal (e, oi, c, long_options, s);
1210 /* Return the default sort size. */
1212 default_sort_size (void)
1214 /* Let MEM be available memory or 1/8 of total memory, whichever
1216 double avail = physmem_available ();
1217 double total = physmem_total ();
1218 double mem = MAX (avail, total / 8);
1219 struct rlimit rlimit;
1221 /* Let SIZE be MEM, but no more than the maximum object size or
1222 system resource limits. Avoid the MIN macro here, as it is not
1223 quite right when only one argument is floating point. Don't
1224 bother to check for values like RLIM_INFINITY since in practice
1225 they are not much less than SIZE_MAX. */
1226 size_t size = SIZE_MAX;
1229 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1230 size = rlimit.rlim_cur;
1232 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1233 size = rlimit.rlim_cur;
1236 /* Leave a large safety margin for the above limits, as failure can
1237 occur when they are exceeded. */
1241 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1242 Exceeding RSS is not fatal, but can be quite slow. */
1243 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1244 size = rlimit.rlim_cur / 16 * 15;
1247 /* Use no less than the minimum. */
1248 return MAX (size, MIN_SORT_SIZE);
1251 /* Return the sort buffer size to use with the input files identified
1252 by FPS and FILES, which are alternate names of the same files.
1253 NFILES gives the number of input files; NFPS may be less. Assume
1254 that each input line requires LINE_BYTES extra bytes' worth of line
1255 information. Do not exceed the size bound specified by the user
1256 (or a default size bound, if the user does not specify one). */
1259 sort_buffer_size (FILE *const *fps, size_t nfps,
1260 char *const *files, size_t nfiles,
1263 /* A bound on the input size. If zero, the bound hasn't been
1265 static size_t size_bound;
1267 /* In the worst case, each input byte is a newline. */
1268 size_t worst_case_per_input_byte = line_bytes + 1;
1270 /* Keep enough room for one extra input line and an extra byte.
1271 This extra room might be needed when preparing to read EOF. */
1272 size_t size = worst_case_per_input_byte + 1;
1276 for (i = 0; i < nfiles; i++)
1282 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1283 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1284 : stat (files[i], &st))
1286 die (_("stat failed"), files[i]);
1288 if (S_ISREG (st.st_mode))
1289 file_size = st.st_size;
1292 /* The file has unknown size. If the user specified a sort
1293 buffer size, use that; otherwise, guess the size. */
1296 file_size = INPUT_FILE_SIZE_GUESS;
1301 size_bound = sort_size;
1303 size_bound = default_sort_size ();
1306 /* Add the amount of memory needed to represent the worst case
1307 where the input consists entirely of newlines followed by a
1308 single non-newline. Check for overflow. */
1309 worst_case = file_size * worst_case_per_input_byte + 1;
1310 if (file_size != worst_case / worst_case_per_input_byte
1311 || size_bound - size <= worst_case)
1319 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1320 must be at least sizeof (struct line). Allocate ALLOC bytes
1324 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1326 /* Ensure that the line array is properly aligned. If the desired
1327 size cannot be allocated, repeatedly halve it until allocation
1328 succeeds. The smaller allocation may hurt overall performance,
1329 but that's better than failing. */
1332 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1333 buf->buf = malloc (alloc);
1337 if (alloc <= line_bytes + 1)
1341 buf->line_bytes = line_bytes;
1343 buf->used = buf->left = buf->nlines = 0;
1347 /* Return one past the limit of the line array. */
1349 static inline struct line *
1350 buffer_linelim (struct buffer const *buf)
1352 return (struct line *) (buf->buf + buf->alloc);
1355 /* Return a pointer to the first character of the field specified
1359 begfield (const struct line *line, const struct keyfield *key)
1361 char *ptr = line->text, *lim = ptr + line->length - 1;
1362 size_t sword = key->sword;
1363 size_t schar = key->schar;
1364 size_t remaining_bytes;
1366 /* The leading field separator itself is included in a field when -t
1369 if (tab != TAB_DEFAULT)
1370 while (ptr < lim && sword--)
1372 while (ptr < lim && *ptr != tab)
1378 while (ptr < lim && sword--)
1380 while (ptr < lim && blanks[to_uchar (*ptr)])
1382 while (ptr < lim && !blanks[to_uchar (*ptr)])
1386 if (key->skipsblanks)
1387 while (ptr < lim && blanks[to_uchar (*ptr)])
1390 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1391 remaining_bytes = lim - ptr;
1392 if (schar < remaining_bytes)
1400 /* Return the limit of (a pointer to the first character after) the field
1401 in LINE specified by KEY. */
1404 limfield (const struct line *line, const struct keyfield *key)
1406 char *ptr = line->text, *lim = ptr + line->length - 1;
1407 size_t eword = key->eword, echar = key->echar;
1408 size_t remaining_bytes;
1410 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1411 whichever comes first. If there are more than EWORD fields, leave
1412 PTR pointing at the beginning of the field having zero-based index,
1413 EWORD. If a delimiter character was specified (via -t), then that
1414 `beginning' is the first character following the delimiting TAB.
1415 Otherwise, leave PTR pointing at the first `blank' character after
1416 the preceding field. */
1417 if (tab != TAB_DEFAULT)
1418 while (ptr < lim && eword--)
1420 while (ptr < lim && *ptr != tab)
1422 if (ptr < lim && (eword | echar))
1426 while (ptr < lim && eword--)
1428 while (ptr < lim && blanks[to_uchar (*ptr)])
1430 while (ptr < lim && !blanks[to_uchar (*ptr)])
1434 #ifdef POSIX_UNSPECIFIED
1435 /* The following block of code makes GNU sort incompatible with
1436 standard Unix sort, so it's ifdef'd out for now.
1437 The POSIX spec isn't clear on how to interpret this.
1438 FIXME: request clarification.
1440 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1441 Date: Thu, 30 May 96 12:20:41 -0400
1442 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1444 [...]I believe I've found another bug in `sort'.
1449 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1452 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1456 Unix sort produced the answer I expected: sort on the single character
1457 in column 7. GNU sort produced different results, because it disagrees
1458 on the interpretation of the key-end spec "M.N". Unix sort reads this
1459 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1460 "skip M-1 fields, then either N-1 characters or the rest of the current
1461 field, whichever comes first". This extra clause applies only to
1462 key-ends, not key-starts.
1465 /* Make LIM point to the end of (one byte past) the current field. */
1466 if (tab != TAB_DEFAULT)
1469 newlim = memchr (ptr, tab, lim - ptr);
1477 while (newlim < lim && blanks[to_uchar (*newlim)])
1479 while (newlim < lim && !blanks[to_uchar (*newlim)])
1485 /* If we're ignoring leading blanks when computing the End
1486 of the field, don't start counting bytes until after skipping
1487 past any leading blanks. */
1488 if (key->skipeblanks)
1489 while (ptr < lim && blanks[to_uchar (*ptr)])
1492 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1493 remaining_bytes = lim - ptr;
1494 if (echar < remaining_bytes)
1502 /* Fill BUF reading from FP, moving buf->left bytes from the end
1503 of buf->buf to the beginning first. If EOF is reached and the
1504 file wasn't terminated by a newline, supply one. Set up BUF's line
1505 table too. FILE is the name of the file corresponding to FP.
1506 Return true if some input was read. */
1509 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1511 struct keyfield const *key = keylist;
1513 size_t line_bytes = buf->line_bytes;
1514 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1519 if (buf->used != buf->left)
1521 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1522 buf->used = buf->left;
1528 char *ptr = buf->buf + buf->used;
1529 struct line *linelim = buffer_linelim (buf);
1530 struct line *line = linelim - buf->nlines;
1531 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1532 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1534 while (line_bytes + 1 < avail)
1536 /* Read as many bytes as possible, but do not read so many
1537 bytes that there might not be enough room for the
1538 corresponding line array. The worst case is when the
1539 rest of the input file consists entirely of newlines,
1540 except that the last byte is not a newline. */
1541 size_t readsize = (avail - 1) / (line_bytes + 1);
1542 size_t bytes_read = fread (ptr, 1, readsize, fp);
1543 char *ptrlim = ptr + bytes_read;
1545 avail -= bytes_read;
1547 if (bytes_read != readsize)
1550 die (_("read failed"), file);
1554 if (buf->buf == ptrlim)
1556 if (ptrlim[-1] != eol)
1561 /* Find and record each line in the just-read input. */
1562 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1566 line->text = line_start;
1567 line->length = ptr - line_start;
1568 mergesize = MAX (mergesize, line->length);
1569 avail -= line_bytes;
1573 /* Precompute the position of the first key for
1575 line->keylim = (key->eword == SIZE_MAX
1577 : limfield (line, key));
1579 if (key->sword != SIZE_MAX)
1580 line->keybeg = begfield (line, key);
1583 if (key->skipsblanks)
1584 while (blanks[to_uchar (*line_start)])
1586 line->keybeg = line_start;
1598 buf->used = ptr - buf->buf;
1599 buf->nlines = buffer_linelim (buf) - line;
1600 if (buf->nlines != 0)
1602 buf->left = ptr - line_start;
1603 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1608 /* The current input line is too long to fit in the buffer.
1609 Double the buffer size and try again, keeping it properly
1611 size_t line_alloc = buf->alloc / sizeof (struct line);
1612 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1613 buf->alloc = line_alloc * sizeof (struct line);
1618 /* Compare strings A and B as numbers without explicitly converting them to
1619 machine numbers. Comparatively slow for short strings, but asymptotically
1623 numcompare (const char *a, const char *b)
1625 while (blanks[to_uchar (*a)])
1627 while (blanks[to_uchar (*b)])
1630 return strnumcmp (a, b, decimal_point, thousands_sep);
1634 general_numcompare (const char *sa, const char *sb)
1636 /* FIXME: add option to warn about failed conversions. */
1637 /* FIXME: maybe add option to try expensive FP conversion
1638 only if A and B can't be compared more cheaply/accurately. */
1642 double a = strtod (sa, &ea);
1643 double b = strtod (sb, &eb);
1645 /* Put conversion errors at the start of the collating sequence. */
1647 return sb == eb ? 0 : -1;
1651 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1652 conversion errors but before numbers; sort them by internal
1653 bit-pattern, for lack of a more portable alternative. */
1659 : memcmp ((char *) &a, (char *) &b, sizeof a));
1662 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1663 Return 0 if the name in S is not recognized. */
1666 getmonth (char const *month, size_t len)
1669 size_t hi = MONTHS_PER_YEAR;
1670 char const *monthlim = month + len;
1674 if (month == monthlim)
1676 if (!blanks[to_uchar (*month)])
1683 size_t ix = (lo + hi) / 2;
1684 char const *m = month;
1685 char const *n = monthtab[ix].name;
1690 return monthtab[ix].val;
1691 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1696 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1708 /* A source of random data. */
1709 static struct randread_source *randread_source;
1711 /* Return the Ith randomly-generated state. The caller must invoke
1712 random_state (H) for all H less than I before invoking random_state
1715 static struct md5_ctx
1716 random_state (size_t i)
1718 /* An array of states resulting from the random data, and counts of
1719 its used and allocated members. */
1720 static struct md5_ctx *state;
1722 static size_t allocated;
1724 struct md5_ctx *s = &state[i];
1728 unsigned char buf[MD5_DIGEST_SIZE];
1734 state = X2NREALLOC (state, &allocated);
1738 randread (randread_source, buf, sizeof buf);
1740 md5_process_bytes (buf, sizeof buf, s);
1746 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1747 with length LENGTHB. Return negative if less, zero if equal,
1748 positive if greater. */
1751 cmp_hashes (char const *texta, size_t lena,
1752 char const *textb, size_t lenb)
1754 /* Try random hashes until a pair of hashes disagree. But if the
1755 first pair of random hashes agree, check whether the keys are
1756 identical and if so report no difference. */
1761 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1762 struct md5_ctx s[2];
1763 s[0] = s[1] = random_state (i);
1764 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1765 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1766 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1769 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1776 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1777 using one or more random hash functions. */
1780 compare_random (char *restrict texta, size_t lena,
1781 char *restrict textb, size_t lenb)
1785 if (! hard_LC_COLLATE)
1786 diff = cmp_hashes (texta, lena, textb, lenb);
1789 /* Transform the text into the basis of comparison, so that byte
1790 strings that would otherwise considered to be equal are
1791 considered equal here even if their bytes differ. */
1794 char stackbuf[4000];
1795 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1796 bool a_fits = tlena <= sizeof stackbuf;
1797 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1798 (a_fits ? sizeof stackbuf - tlena : 0),
1801 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1805 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1806 faster by avoiding the need for an extra buffer copy. */
1807 buf = xmalloc (tlena + tlenb + 1);
1808 xmemxfrm (buf, tlena + 1, texta, lena);
1809 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1812 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1814 if (buf != stackbuf)
1821 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1822 using strverscmp. */
1825 compare_version (char *restrict texta, size_t lena,
1826 char *restrict textb, size_t lenb)
1830 /* It is necessary to save the character after the end of the field.
1831 "strverscmp" works with NUL terminated strings. Our blocks of
1832 text are not necessarily terminated with a NUL byte. */
1833 char sv_a = texta[lena];
1834 char sv_b = textb[lenb];
1839 diff = strverscmp (texta, textb);
1847 /* Compare two lines A and B trying every key in sequence until there
1848 are no more keys or a difference is found. */
1851 keycompare (const struct line *a, const struct line *b)
1853 struct keyfield const *key = keylist;
1855 /* For the first iteration only, the key positions have been
1856 precomputed for us. */
1857 char *texta = a->keybeg;
1858 char *textb = b->keybeg;
1859 char *lima = a->keylim;
1860 char *limb = b->keylim;
1866 char const *translate = key->translate;
1867 bool const *ignore = key->ignore;
1869 /* Find the lengths. */
1870 size_t lena = lima <= texta ? 0 : lima - texta;
1871 size_t lenb = limb <= textb ? 0 : limb - textb;
1873 /* Actually compare the fields. */
1876 diff = compare_random (texta, lena, textb, lenb);
1877 else if (key->numeric | key->general_numeric)
1879 char savea = *lima, saveb = *limb;
1881 *lima = *limb = '\0';
1882 diff = ((key->numeric ? numcompare : general_numcompare)
1884 *lima = savea, *limb = saveb;
1886 else if (key->version)
1887 diff = compare_version (texta, lena, textb, lenb);
1888 else if (key->month)
1889 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1890 /* Sorting like this may become slow, so in a simple locale the user
1891 can select a faster sort that is similar to ascii sort. */
1892 else if (hard_LC_COLLATE)
1894 if (ignore || translate)
1897 size_t size = lena + 1 + lenb + 1;
1898 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1899 char *copy_b = copy_a + lena + 1;
1900 size_t new_len_a, new_len_b, i;
1902 /* Ignore and/or translate chars before comparing. */
1903 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1907 copy_a[new_len_a] = (translate
1908 ? translate[to_uchar (texta[i])]
1910 if (!ignore || !ignore[to_uchar (texta[i])])
1915 copy_b[new_len_b] = (translate
1916 ? translate[to_uchar (textb[i])]
1918 if (!ignore || !ignore[to_uchar (textb[i])])
1923 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1925 if (sizeof buf < size)
1929 diff = - NONZERO (lenb);
1933 diff = xmemcoll (texta, lena, textb, lenb);
1937 #define CMP_WITH_IGNORE(A, B) \
1942 while (texta < lima && ignore[to_uchar (*texta)]) \
1944 while (textb < limb && ignore[to_uchar (*textb)]) \
1946 if (! (texta < lima && textb < limb)) \
1948 diff = to_uchar (A) - to_uchar (B); \
1955 diff = (texta < lima) - (textb < limb); \
1960 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1961 translate[to_uchar (*textb)]);
1963 CMP_WITH_IGNORE (*texta, *textb);
1966 diff = - NONZERO (lenb);
1973 while (texta < lima && textb < limb)
1975 diff = (to_uchar (translate[to_uchar (*texta++)])
1976 - to_uchar (translate[to_uchar (*textb++)]));
1983 diff = memcmp (texta, textb, MIN (lena, lenb));
1987 diff = lena < lenb ? -1 : lena != lenb;
1997 /* Find the beginning and limit of the next field. */
1998 if (key->eword != SIZE_MAX)
1999 lima = limfield (a, key), limb = limfield (b, key);
2001 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2003 if (key->sword != SIZE_MAX)
2004 texta = begfield (a, key), textb = begfield (b, key);
2007 texta = a->text, textb = b->text;
2008 if (key->skipsblanks)
2010 while (texta < lima && blanks[to_uchar (*texta)])
2012 while (textb < limb && blanks[to_uchar (*textb)])
2023 return key->reverse ? -diff : diff;
2026 /* Compare two lines A and B, returning negative, zero, or positive
2027 depending on whether A compares less than, equal to, or greater than B. */
2030 compare (const struct line *a, const struct line *b)
2035 /* First try to compare on the specified keys (if any).
2036 The only two cases with no key at all are unadorned sort,
2037 and unadorned sort -r. */
2040 diff = keycompare (a, b);
2041 if (diff | unique | stable)
2045 /* If the keys all compare equal (or no keys were specified)
2046 fall through to the default comparison. */
2047 alen = a->length - 1, blen = b->length - 1;
2050 diff = - NONZERO (blen);
2053 else if (hard_LC_COLLATE)
2054 diff = xmemcoll (a->text, alen, b->text, blen);
2055 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2056 diff = alen < blen ? -1 : alen != blen;
2058 return reverse ? -diff : diff;
2061 /* Check that the lines read from FILE_NAME come in order. Return
2062 true if they are in order. If CHECKONLY == 'c', also print a
2063 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2064 they are not in order. */
2067 check (char const *file_name, char checkonly)
2069 FILE *fp = xfopen (file_name, "r");
2070 struct buffer buf; /* Input buffer. */
2071 struct line temp; /* Copy of previous line. */
2073 uintmax_t line_number = 0;
2074 struct keyfield const *key = keylist;
2075 bool nonunique = ! unique;
2076 bool ordered = true;
2078 initbuf (&buf, sizeof (struct line),
2079 MAX (merge_buffer_size, sort_size));
2082 while (fillbuf (&buf, fp, file_name))
2084 struct line const *line = buffer_linelim (&buf);
2085 struct line const *linebase = line - buf.nlines;
2087 /* Make sure the line saved from the old buffer contents is
2088 less than or equal to the first line of the new buffer. */
2089 if (alloc && nonunique <= compare (&temp, line - 1))
2093 if (checkonly == 'c')
2095 struct line const *disorder_line = line - 1;
2096 uintmax_t disorder_line_number =
2097 buffer_linelim (&buf) - disorder_line + line_number;
2098 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2099 fprintf (stderr, _("%s: %s:%s: disorder: "),
2100 program_name, file_name,
2101 umaxtostr (disorder_line_number, hr_buf));
2102 write_bytes (disorder_line->text, disorder_line->length,
2103 stderr, _("standard error"));
2111 /* Compare each line in the buffer with its successor. */
2112 while (linebase < --line)
2113 if (nonunique <= compare (line, line - 1))
2114 goto found_disorder;
2116 line_number += buf.nlines;
2118 /* Save the last line of the buffer. */
2119 if (alloc < line->length)
2126 alloc = line->length;
2130 while (alloc < line->length);
2132 temp.text = xrealloc (temp.text, alloc);
2134 memcpy (temp.text, line->text, line->length);
2135 temp.length = line->length;
2138 temp.keybeg = temp.text + (line->keybeg - line->text);
2139 temp.keylim = temp.text + (line->keylim - line->text);
2143 xfclose (fp, file_name);
2149 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2150 files (all of which are at the start of the FILES array), and
2151 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2152 Close input and output files before returning.
2153 OUTPUT_FILE gives the name of the output file. If it is NULL,
2154 the output file is standard output. If OFP is NULL, the output
2155 file has not been opened yet (or written to, if standard output). */
2158 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2159 FILE *ofp, char const *output_file)
2161 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2162 /* Input streams for each file. */
2163 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2164 /* Input buffers for each file. */
2165 struct line saved; /* Saved line storage for unique check. */
2166 struct line const *savedline = NULL;
2167 /* &saved if there is a saved line. */
2168 size_t savealloc = 0; /* Size allocated for the saved line. */
2169 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2170 /* Current line in each line table. */
2171 struct line const **base = xnmalloc (nmerge, sizeof *base);
2172 /* Base of each line table. */
2173 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2174 /* Table representing a permutation of fps,
2175 such that cur[ord[0]] is the smallest line
2176 and will be next output. */
2180 struct keyfield const *key = keylist;
2183 /* Read initial lines from each input file. */
2184 for (i = 0; i < nfiles; )
2186 fps[i] = (files[i].pid
2187 ? open_temp (files[i].name, files[i].pid)
2188 : xfopen (files[i].name, "r"));
2189 initbuf (&buffer[i], sizeof (struct line),
2190 MAX (merge_buffer_size, sort_size / nfiles));
2191 if (fillbuf (&buffer[i], fps[i], files[i].name))
2193 struct line const *linelim = buffer_linelim (&buffer[i]);
2194 cur[i] = linelim - 1;
2195 base[i] = linelim - buffer[i].nlines;
2200 /* fps[i] is empty; eliminate it from future consideration. */
2201 xfclose (fps[i], files[i].name);
2205 zaptemp (files[i].name);
2207 free (buffer[i].buf);
2209 for (j = i; j < nfiles; ++j)
2210 files[j] = files[j + 1];
2215 ofp = xfopen (output_file, "w");
2217 /* Set up the ord table according to comparisons among input lines.
2218 Since this only reorders two items if one is strictly greater than
2219 the other, it is stable. */
2220 for (i = 0; i < nfiles; ++i)
2222 for (i = 1; i < nfiles; ++i)
2223 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2224 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2226 /* Repeatedly output the smallest line until no input remains. */
2229 struct line const *smallest = cur[ord[0]];
2231 /* If uniquified output is turned on, output only the first of
2232 an identical series of lines. */
2235 if (savedline && compare (savedline, smallest))
2238 write_bytes (saved.text, saved.length, ofp, output_file);
2243 if (savealloc < smallest->length)
2248 savealloc = smallest->length;
2251 while ((savealloc *= 2) < smallest->length);
2253 saved.text = xrealloc (saved.text, savealloc);
2255 saved.length = smallest->length;
2256 memcpy (saved.text, smallest->text, saved.length);
2260 saved.text + (smallest->keybeg - smallest->text);
2262 saved.text + (smallest->keylim - smallest->text);
2267 write_bytes (smallest->text, smallest->length, ofp, output_file);
2269 /* Check if we need to read more lines into core. */
2270 if (base[ord[0]] < smallest)
2271 cur[ord[0]] = smallest - 1;
2274 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2276 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2277 cur[ord[0]] = linelim - 1;
2278 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2282 /* We reached EOF on fps[ord[0]]. */
2283 for (i = 1; i < nfiles; ++i)
2284 if (ord[i] > ord[0])
2287 xfclose (fps[ord[0]], files[ord[0]].name);
2288 if (ord[0] < ntemps)
2291 zaptemp (files[ord[0]].name);
2293 free (buffer[ord[0]].buf);
2294 for (i = ord[0]; i < nfiles; ++i)
2296 fps[i] = fps[i + 1];
2297 files[i] = files[i + 1];
2298 buffer[i] = buffer[i + 1];
2299 cur[i] = cur[i + 1];
2300 base[i] = base[i + 1];
2302 for (i = 0; i < nfiles; ++i)
2303 ord[i] = ord[i + 1];
2308 /* The new line just read in may be larger than other lines
2309 already in main memory; push it back in the queue until we
2310 encounter a line larger than it. Optimize for the common
2311 case where the new line is smallest. */
2316 size_t ord0 = ord[0];
2317 size_t count_of_smaller_lines;
2321 int cmp = compare (cur[ord0], cur[ord[probe]]);
2322 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2326 probe = (lo + hi) / 2;
2329 count_of_smaller_lines = lo - 1;
2330 for (j = 0; j < count_of_smaller_lines; j++)
2331 ord[j] = ord[j + 1];
2332 ord[count_of_smaller_lines] = ord0;
2335 /* Free up some resources every once in a while. */
2336 if (MAX_PROCS_BEFORE_REAP < nprocs)
2340 if (unique && savedline)
2342 write_bytes (saved.text, saved.length, ofp, output_file);
2346 xfclose (ofp, output_file);
2354 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2355 and HI (with NHI members). T, LO, and HI point just past their
2356 respective arrays, and the arrays are in reverse order. NLO and
2357 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2360 mergelines (struct line *t,
2361 struct line const *lo, size_t nlo,
2362 struct line const *hi, size_t nhi)
2365 if (compare (lo - 1, hi - 1) <= 0)
2370 /* HI - NHI equalled T - (NLO + NHI) when this function
2371 began. Therefore HI must equal T now, and there is no
2372 need to copy from HI to T. */
2390 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2391 NLINES must be at least 2.
2392 The input and output arrays are in reverse order, and LINES and
2393 TEMP point just past the end of their respective arrays.
2395 Use a recursive divide-and-conquer algorithm, in the style
2396 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2397 the optimization suggested by exercise 5.2.4-10; this requires room
2398 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2399 writes that this memory optimization was originally published by
2400 D. A. Bell, Comp J. 1 (1958), 75. */
2403 sortlines (struct line *lines, size_t nlines, struct line *temp)
2407 if (0 < compare (&lines[-1], &lines[-2]))
2409 struct line tmp = lines[-1];
2410 lines[-1] = lines[-2];
2416 size_t nlo = nlines / 2;
2417 size_t nhi = nlines - nlo;
2418 struct line *lo = lines;
2419 struct line *hi = lines - nlo;
2420 struct line *sorted_lo = temp;
2422 sortlines (hi, nhi, temp);
2424 sortlines_temp (lo, nlo, sorted_lo);
2426 sorted_lo[-1] = lo[-1];
2428 mergelines (lines, sorted_lo, nlo, hi, nhi);
2432 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2433 rather than sorting in place. */
2436 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2440 /* Declare `swap' as int, not bool, to work around a bug
2441 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2442 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2443 int swap = (0 < compare (&lines[-1], &lines[-2]));
2444 temp[-1] = lines[-1 - swap];
2445 temp[-2] = lines[-2 + swap];
2449 size_t nlo = nlines / 2;
2450 size_t nhi = nlines - nlo;
2451 struct line *lo = lines;
2452 struct line *hi = lines - nlo;
2453 struct line *sorted_hi = temp - nlo;
2455 sortlines_temp (hi, nhi, sorted_hi);
2457 sortlines (lo, nlo, temp);
2459 mergelines (temp, lo, nlo, sorted_hi, nhi);
2463 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2464 the same as OUTFILE. If found, merge the found instances (and perhaps
2465 some other files) into a temporary file so that it can in turn be
2466 merged into OUTFILE without destroying OUTFILE before it is completely
2467 read. Return the new value of NFILES, which differs from the old if
2468 some merging occurred.
2470 This test ensures that an otherwise-erroneous use like
2471 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2472 It's not clear that POSIX requires this nicety.
2473 Detect common error cases, but don't try to catch obscure cases like
2474 "cat ... FILE ... | sort -m -o FILE"
2475 where traditional "sort" doesn't copy the input and where
2476 people should know that they're getting into trouble anyway.
2477 Catching these obscure cases would slow down performance in
2481 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2482 size_t nfiles, char const *outfile)
2485 bool got_outstat = false;
2486 struct stat outstat;
2488 for (i = ntemps; i < nfiles; i++)
2490 bool is_stdin = STREQ (files[i].name, "-");
2494 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2501 ? stat (outfile, &outstat)
2502 : fstat (STDOUT_FILENO, &outstat))
2509 ? fstat (STDIN_FILENO, &instat)
2510 : stat (files[i].name, &instat))
2512 && SAME_INODE (instat, outstat));
2519 char *temp = create_temp (&tftp, &pid);
2520 mergefps (&files[i],0, nfiles - i, tftp, temp);
2521 files[i].name = temp;
2530 /* Merge the input FILES. NTEMPS is the number of files at the
2531 start of FILES that are temporary; it is zero at the top level.
2532 NFILES is the total number of files. Put the output in
2533 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2536 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2537 char const *output_file)
2539 while (nmerge < nfiles)
2541 /* Number of input files processed so far. */
2544 /* Number of output files generated so far. */
2547 /* nfiles % NMERGE; this counts input files that are left over
2548 after all full-sized merges have been done. */
2551 /* Number of easily-available slots at the next loop iteration. */
2554 /* Do as many NMERGE-size merges as possible. */
2555 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2559 char *temp = create_temp (&tfp, &pid);
2560 size_t nt = MIN (ntemps, nmerge);
2562 mergefps (&files[in], nt, nmerge, tfp, temp);
2563 files[out].name = temp;
2564 files[out].pid = pid;
2567 remainder = nfiles - in;
2568 cheap_slots = nmerge - out % nmerge;
2570 if (cheap_slots < remainder)
2572 /* So many files remain that they can't all be put into the last
2573 NMERGE-sized output window. Do one more merge. Merge as few
2574 files as possible, to avoid needless I/O. */
2575 size_t nshortmerge = remainder - cheap_slots + 1;
2578 char *temp = create_temp (&tfp, &pid);
2579 size_t nt = MIN (ntemps, nshortmerge);
2581 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2582 files[out].name = temp;
2583 files[out++].pid = pid;
2587 /* Put the remaining input files into the last NMERGE-sized output
2588 window, so they will be merged in the next pass. */
2589 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2594 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2595 mergefps (files, ntemps, nfiles, NULL, output_file);
2598 /* Sort NFILES FILES onto OUTPUT_FILE. */
2601 sort (char * const *files, size_t nfiles, char const *output_file)
2605 bool output_file_created = false;
2611 char const *temp_output;
2612 char const *file = *files;
2613 FILE *fp = xfopen (file, "r");
2615 size_t bytes_per_line = (2 * sizeof (struct line)
2616 - sizeof (struct line) / 2);
2619 initbuf (&buf, bytes_per_line,
2620 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2625 while (fillbuf (&buf, fp, file))
2628 struct line *linebase;
2630 if (buf.eof && nfiles
2631 && (bytes_per_line + 1
2632 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2634 /* End of file, but there is more input and buffer room.
2635 Concatenate the next input file; this is faster in
2637 buf.left = buf.used;
2641 line = buffer_linelim (&buf);
2642 linebase = line - buf.nlines;
2644 sortlines (line, buf.nlines, linebase);
2645 if (buf.eof && !nfiles && !ntemps && !buf.left)
2648 tfp = xfopen (output_file, "w");
2649 temp_output = output_file;
2650 output_file_created = true;
2655 temp_output = create_temp (&tfp, NULL);
2661 write_bytes (line->text, line->length, tfp, temp_output);
2663 while (linebase < line && compare (line, line - 1) == 0)
2666 while (linebase < line);
2668 xfclose (tfp, temp_output);
2670 /* Free up some resources every once in a while. */
2671 if (MAX_PROCS_BEFORE_REAP < nprocs)
2674 if (output_file_created)
2683 if (! output_file_created)
2686 struct tempnode *node = temphead;
2687 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2688 for (i = 0; node; i++)
2690 tempfiles[i].name = node->name;
2691 tempfiles[i].pid = node->pid;
2694 merge (tempfiles, ntemps, ntemps, output_file);
2699 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2702 insertkey (struct keyfield *key_arg)
2704 struct keyfield **p;
2705 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2707 for (p = &keylist; *p; p = &(*p)->next)
2713 /* Report a bad field specification SPEC, with extra info MSGID. */
2715 static void badfieldspec (char const *, char const *)
2718 badfieldspec (char const *spec, char const *msgid)
2720 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2721 _(msgid), quote (spec));
2725 /* Report incompatible options. */
2727 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2729 incompatible_options (char const *opts)
2731 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2735 /* Check compatibility of ordering options. */
2738 check_ordering_compatibility (void)
2740 struct keyfield const *key;
2742 for (key = keylist; key; key = key->next)
2743 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2744 + key->version + !!key->ignore))
2745 || (key->random && key->translate))
2747 /* The following is too big, but guaranteed to be "big enough". */
2748 char opts[sizeof short_options];
2750 if (key->ignore == nondictionary)
2754 if (key->general_numeric)
2756 if (key->ignore == nonprinting)
2767 incompatible_options (opts);
2771 /* Parse the leading integer in STRING and store the resulting value
2772 (which must fit into size_t) into *VAL. Return the address of the
2773 suffix after the integer. If the value is too large, silently
2774 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2775 failure; otherwise, report MSGID and exit on failure. */
2778 parse_field_count (char const *string, size_t *val, char const *msgid)
2783 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2786 case LONGINT_INVALID_SUFFIX_CHAR:
2791 case LONGINT_OVERFLOW:
2792 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2796 case LONGINT_INVALID:
2798 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2799 _(msgid), quote (string));
2806 /* Handle interrupts and hangups. */
2809 sighandler (int sig)
2812 signal (sig, SIG_IGN);
2816 signal (sig, SIG_DFL);
2820 /* Set the ordering options for KEY specified in S.
2821 Return the address of the first character in S that
2822 is not a valid ordering option.
2823 BLANKTYPE is the kind of blanks that 'b' should skip. */
2826 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2833 if (blanktype == bl_start || blanktype == bl_both)
2834 key->skipsblanks = true;
2835 if (blanktype == bl_end || blanktype == bl_both)
2836 key->skipeblanks = true;
2839 key->ignore = nondictionary;
2842 key->translate = fold_toupper;
2845 key->general_numeric = true;
2848 /* Option order should not matter, so don't let -i override
2849 -d. -d implies -i, but -i does not imply -d. */
2851 key->ignore = nonprinting;
2857 key->numeric = true;
2863 key->reverse = true;
2866 key->version = true;
2876 static struct keyfield *
2877 key_init (struct keyfield *key)
2879 memset (key, 0, sizeof *key);
2880 key->eword = SIZE_MAX;
2885 main (int argc, char **argv)
2887 struct keyfield *key;
2888 struct keyfield key_buf;
2889 struct keyfield gkey;
2893 bool mergeonly = false;
2894 char *random_source = NULL;
2895 bool need_random = false;
2897 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2898 bool obsolete_usage = (posix2_version () < 200112);
2900 char *files_from = NULL;
2902 char const *outfile = NULL;
2904 initialize_main (&argc, &argv);
2905 set_program_name (argv[0]);
2906 setlocale (LC_ALL, "");
2907 bindtextdomain (PACKAGE, LOCALEDIR);
2908 textdomain (PACKAGE);
2910 initialize_exit_failure (SORT_FAILURE);
2912 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2913 #if HAVE_NL_LANGINFO
2914 hard_LC_TIME = hard_locale (LC_TIME);
2917 /* Get locale's representation of the decimal point. */
2919 struct lconv const *locale = localeconv ();
2921 /* If the locale doesn't define a decimal point, or if the decimal
2922 point is multibyte, use the C locale's decimal point. FIXME:
2923 add support for multibyte decimal points. */
2924 decimal_point = to_uchar (locale->decimal_point[0]);
2925 if (! decimal_point || locale->decimal_point[1])
2926 decimal_point = '.';
2928 /* FIXME: add support for multibyte thousands separators. */
2929 thousands_sep = to_uchar (*locale->thousands_sep);
2930 if (! thousands_sep || locale->thousands_sep[1])
2934 have_read_stdin = false;
2939 static int const sig[] =
2941 /* The usual suspects. */
2942 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2959 enum { nsigs = sizeof sig / sizeof sig[0] };
2962 struct sigaction act;
2964 sigemptyset (&caught_signals);
2965 for (i = 0; i < nsigs; i++)
2967 sigaction (sig[i], NULL, &act);
2968 if (act.sa_handler != SIG_IGN)
2969 sigaddset (&caught_signals, sig[i]);
2972 act.sa_handler = sighandler;
2973 act.sa_mask = caught_signals;
2976 for (i = 0; i < nsigs; i++)
2977 if (sigismember (&caught_signals, sig[i]))
2978 sigaction (sig[i], &act, NULL);
2980 for (i = 0; i < nsigs; i++)
2981 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2983 signal (sig[i], sighandler);
2984 siginterrupt (sig[i], 1);
2989 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2990 atexit (exit_cleanup);
2992 gkey.sword = gkey.eword = SIZE_MAX;
2994 gkey.translate = NULL;
2995 gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
2996 gkey.month = gkey.reverse = false;
2997 gkey.skipsblanks = gkey.skipeblanks = false;
2999 files = xnmalloc (argc, sizeof *files);
3003 /* Parse an operand as a file after "--" was seen; or if
3004 pedantic and a file was seen, unless the POSIX version
3005 predates 1003.1-2001 and -c was not seen and the operand is
3006 "-o FILE" or "-oFILE". */
3010 || (posixly_correct && nfiles != 0
3011 && ! (obsolete_usage
3014 && argv[optind][0] == '-' && argv[optind][1] == 'o'
3015 && (argv[optind][2] || optind + 1 != argc)))
3016 || ((c = getopt_long (argc, argv, short_options,
3022 files[nfiles++] = argv[optind++];
3028 if (optarg[0] == '+')
3030 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3031 && ISDIGIT (argv[optind][1]));
3032 obsolete_usage |= minus_pos_usage & ~posixly_correct;
3035 /* Treat +POS1 [-POS2] as a key if possible; but silently
3036 treat an operand as a file if it is not a valid +POS1. */
3037 key = key_init (&key_buf);
3038 s = parse_field_count (optarg + 1, &key->sword, NULL);
3040 s = parse_field_count (s + 1, &key->schar, NULL);
3041 if (! (key->sword | key->schar))
3042 key->sword = SIZE_MAX;
3043 if (! s || *set_ordering (s, key, bl_start))
3047 if (minus_pos_usage)
3049 char const *optarg1 = argv[optind++];
3050 s = parse_field_count (optarg1 + 1, &key->eword,
3051 N_("invalid number after `-'"));
3053 s = parse_field_count (s + 1, &key->echar,
3054 N_("invalid number after `.'"));
3055 if (*set_ordering (s, key, bl_end))
3056 badfieldspec (optarg1,
3057 N_("stray character in field spec"));
3064 files[nfiles++] = optarg;
3068 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3084 set_ordering (str, &gkey, bl_both);
3090 ? XARGMATCH ("--check", optarg, check_args, check_types)
3095 if (checkonly && checkonly != c)
3096 incompatible_options ("cC");
3100 case COMPRESS_PROGRAM_OPTION:
3101 if (compress_program && !STREQ (compress_program, optarg))
3102 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3103 compress_program = optarg;
3106 case FILES0_FROM_OPTION:
3107 files_from = optarg;
3111 key = key_init (&key_buf);
3114 s = parse_field_count (optarg, &key->sword,
3115 N_("invalid number at field start"));
3118 /* Provoke with `sort -k0' */
3119 badfieldspec (optarg, N_("field number is zero"));
3123 s = parse_field_count (s + 1, &key->schar,
3124 N_("invalid number after `.'"));
3127 /* Provoke with `sort -k1.0' */
3128 badfieldspec (optarg, N_("character offset is zero"));
3131 if (! (key->sword | key->schar))
3132 key->sword = SIZE_MAX;
3133 s = set_ordering (s, key, bl_start);
3136 key->eword = SIZE_MAX;
3142 s = parse_field_count (s + 1, &key->eword,
3143 N_("invalid number after `,'"));
3146 /* Provoke with `sort -k1,0' */
3147 badfieldspec (optarg, N_("field number is zero"));
3150 s = parse_field_count (s + 1, &key->echar,
3151 N_("invalid number after `.'"));
3154 /* `-k 2,3' is equivalent to `+1 -3'. */
3157 s = set_ordering (s, key, bl_end);
3160 badfieldspec (optarg, N_("stray character in field spec"));
3169 specify_nmerge (oi, c, optarg);
3173 if (outfile && !STREQ (outfile, optarg))
3174 error (SORT_FAILURE, 0, _("multiple output files specified"));
3178 case RANDOM_SOURCE_OPTION:
3179 if (random_source && !STREQ (random_source, optarg))
3180 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3181 random_source = optarg;
3189 specify_sort_size (oi, c, optarg);
3194 char newtab = optarg[0];
3196 error (SORT_FAILURE, 0, _("empty tab"));
3199 if (STREQ (optarg, "\\0"))
3203 /* Provoke with `sort -txx'. Complain about
3204 "multi-character tab" instead of "multibyte tab", so
3205 that the diagnostic's wording does not need to be
3206 changed once multibyte characters are supported. */
3207 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3211 if (tab != TAB_DEFAULT && tab != newtab)
3212 error (SORT_FAILURE, 0, _("incompatible tabs"));
3218 add_temp_dir (optarg);
3226 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3227 through Solaris 7. It is also accepted by many non-Solaris
3228 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3229 -y is marked as obsolete starting with Solaris 8 (1999), but is
3230 still accepted as of Solaris 10 prerelease (2004).
3232 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3233 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3234 and which in general ignores the argument after "-y" if it
3235 consists entirely of digits (it can even be empty). */
3236 if (optarg == argv[optind - 1])
3239 for (p = optarg; ISDIGIT (*p); p++)
3241 optind -= (*p != '\0');
3249 case_GETOPT_HELP_CHAR;
3251 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3254 usage (SORT_FAILURE);
3262 /* When using --files0-from=F, you may not specify any files
3263 on the command-line. */
3266 error (0, 0, _("extra operand %s"), quote (files[0]));
3267 fprintf (stderr, "%s\n",
3268 _("file operands cannot be combined with --files0-from"));
3269 usage (SORT_FAILURE);
3272 if (STREQ (files_from, "-"))
3276 stream = fopen (files_from, "r");
3278 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3279 quote (files_from));
3282 readtokens0_init (&tok);
3284 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3285 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3286 quote (files_from));
3294 for (i = 0; i < nfiles; i++)
3296 if (STREQ (files[i], "-"))
3297 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3298 "no file name of %s allowed"),
3300 else if (files[i][0] == '\0')
3302 /* Using the standard `filename:line-number:' prefix here is
3303 not totally appropriate, since NUL is the separator, not NL,
3304 but it might be better than nothing. */
3305 unsigned long int file_number = i + 1;
3306 error (SORT_FAILURE, 0,
3307 _("%s:%lu: invalid zero-length file name"),
3308 quotearg_colon (files_from), file_number);
3313 error (SORT_FAILURE, 0, _("no input from %s"),
3314 quote (files_from));
3317 /* Inheritance of global options to individual keys. */
3318 for (key = keylist; key; key = key->next)
3322 || (key->skipsblanks
3328 | key->general_numeric
3331 key->ignore = gkey.ignore;
3332 key->translate = gkey.translate;
3333 key->skipsblanks = gkey.skipsblanks;
3334 key->skipeblanks = gkey.skipeblanks;
3335 key->month = gkey.month;
3336 key->numeric = gkey.numeric;
3337 key->general_numeric = gkey.general_numeric;
3338 key->random = gkey.random;
3339 key->reverse = gkey.reverse;
3340 key->version = gkey.version;
3343 need_random |= key->random;
3346 if (!keylist && (gkey.ignore
3348 || (gkey.skipsblanks
3352 | gkey.general_numeric
3357 need_random |= gkey.random;
3360 check_ordering_compatibility ();
3362 reverse = gkey.reverse;
3366 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3367 if (! randread_source)
3368 die (_("open failed"), random_source);
3371 if (temp_dir_count == 0)
3373 char const *tmp_dir = getenv ("TMPDIR");
3374 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3379 static char *minus = "-";
3385 /* Need to re-check that we meet the minimum requirement for memory
3386 usage with the final value for NMERGE. */
3388 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3393 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3394 quote (files[1]), checkonly);
3398 static char opts[] = {0, 'o', 0};
3399 opts[0] = checkonly;
3400 incompatible_options (opts);
3403 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3404 input is not properly sorted. */
3405 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3410 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3413 for (i = 0; i < nfiles; ++i)
3414 sortfiles[i].name = files[i];
3416 merge (sortfiles, 0, nfiles, outfile);
3417 IF_LINT (free (sortfiles));
3420 sort (files, nfiles, outfile);
3422 if (have_read_stdin && fclose (stdin) == EOF)
3423 die (_("close failed"), "-");
3425 exit (EXIT_SUCCESS);