1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
79 #if !defined OPEN_MAX && defined NR_OPEN
80 # define OPEN_MAX NR_OPEN
86 #define UCHAR_LIM (UCHAR_MAX + 1)
88 #ifndef DEFAULT_TMPDIR
89 # define DEFAULT_TMPDIR "/tmp"
95 /* POSIX says to exit with status 1 if invoked with -c and the
96 input is not properly sorted. */
97 SORT_OUT_OF_ORDER = 1,
99 /* POSIX says any other irregular exit must exit with a status
100 code greater than 1. */
106 /* The number of times we should try to fork a compression process
107 (we retry if the fork call fails). We don't _need_ to compress
108 temp files, this is just to reduce disk access, so this number
110 MAX_FORK_TRIES_COMPRESS = 2,
112 /* The number of times we should try to fork a decompression process.
113 If we can't fork a decompression process, we can't sort, so this
114 number should be big. */
115 MAX_FORK_TRIES_DECOMPRESS = 8
118 /* The representation of the decimal point in the current locale. */
119 static int decimal_point;
121 /* Thousands separator; if -1, then there isn't one. */
122 static int thousands_sep;
124 /* Nonzero if the corresponding locales are hard. */
125 static bool hard_LC_COLLATE;
127 static bool hard_LC_TIME;
130 #define NONZERO(x) ((x) != 0)
132 /* The kind of blanks for '-b' to skip in various options. */
133 enum blanktype { bl_start, bl_end, bl_both };
135 /* The character marking end of line. Default to \n. */
136 static char eolchar = '\n';
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 size_t length; /* Length including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
150 char *buf; /* Dynamically allocated buffer,
151 partitioned into 3 regions:
154 - an array of lines, in reverse order. */
155 size_t used; /* Number of bytes used for input data. */
156 size_t nlines; /* Number of lines in the line array. */
157 size_t alloc; /* Number of bytes allocated. */
158 size_t left; /* Number of bytes left from previous reads. */
159 size_t line_bytes; /* Number of bytes to reserve for each line. */
160 bool eof; /* An EOF has been read. */
165 size_t sword; /* Zero-origin 'word' to start at. */
166 size_t schar; /* Additional characters to skip. */
167 size_t eword; /* Zero-origin first word after field. */
168 size_t echar; /* Additional characters in field. */
169 bool const *ignore; /* Boolean array of characters to ignore. */
170 char const *translate; /* Translation applied to characters. */
171 bool skipsblanks; /* Skip leading blanks when finding start. */
172 bool skipeblanks; /* Skip leading blanks when finding end. */
173 bool numeric; /* Flag for numeric comparison. Handle
174 strings of digits with optional decimal
175 point, but no exponential notation. */
176 bool random; /* Sort by random hash of key. */
177 bool general_numeric; /* Flag for general, numeric comparison.
178 Handle numbers in exponential notation. */
179 bool month; /* Flag for comparison by month name. */
180 bool reverse; /* Reverse the sense of comparison. */
181 struct keyfield *next; /* Next keyfield to try. */
190 /* FIXME: None of these tables work with multibyte character sets.
191 Also, there are many other bugs when handling multibyte characters.
192 One way to fix this is to rewrite `sort' to use wide characters
193 internally, but doing this with good performance is a bit
196 /* Table of blanks. */
197 static bool blanks[UCHAR_LIM];
199 /* Table of non-printing characters. */
200 static bool nonprinting[UCHAR_LIM];
202 /* Table of non-dictionary characters (not letters, digits, or blanks). */
203 static bool nondictionary[UCHAR_LIM];
205 /* Translation table folding lower case to upper. */
206 static char fold_toupper[UCHAR_LIM];
208 #define MONTHS_PER_YEAR 12
210 /* Table mapping month names to integers.
211 Alphabetic order allows binary search. */
212 static struct month monthtab[] =
228 /* During the merge phase, the number of files to merge at once. */
229 #define NMERGE_DEFAULT 16
231 /* Minimum size for a merge or check buffer. */
232 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
234 /* Minimum sort size; the code might not work with smaller sizes. */
235 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
237 /* The number of bytes needed for a merge or check buffer, which can
238 function relatively efficiently even if it holds only one line. If
239 a longer line is seen, this value is increased. */
240 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
242 /* The approximate maximum number of bytes of main memory to use, as
243 specified by the user. Zero if the user has not specified a size. */
244 static size_t sort_size;
246 /* The guessed size for non-regular files. */
247 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
249 /* Array of directory names in which any temporary files are to be created. */
250 static char const **temp_dirs;
252 /* Number of temporary directory names used. */
253 static size_t temp_dir_count;
255 /* Number of allocated slots in temp_dirs. */
256 static size_t temp_dir_alloc;
258 /* Flag to reverse the order of all comparisons. */
261 /* Flag for stable sort. This turns off the last ditch bytewise
262 comparison of lines, and instead leaves lines in the same order
263 they were read if all keys compare equal. */
266 /* If TAB has this value, blanks separate fields. */
267 enum { TAB_DEFAULT = CHAR_MAX + 1 };
269 /* Tab character separating fields. If TAB_DEFAULT, then fields are
270 separated by the empty string between a non-blank character and a blank
272 static int tab = TAB_DEFAULT;
274 /* Flag to remove consecutive duplicate lines from the output.
275 Only the last of a sequence of equal lines will be output. */
278 /* Nonzero if any of the input files are the standard input. */
279 static bool have_read_stdin;
281 /* List of key field comparisons to be tried. */
282 static struct keyfield *keylist;
284 /* Program used to (de)compress temp files. Must accept -d. */
285 static char const *compress_program;
287 /* Maximum number of files to merge in one go. If more than this
288 number are present, temp files will be used. */
289 static unsigned int nmerge = NMERGE_DEFAULT;
291 static void sortlines_temp (struct line *, size_t, struct line *);
293 /* Report MESSAGE for FILE, then clean up and exit.
294 If FILE is null, it represents standard output. */
296 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
298 die (char const *message, char const *file)
300 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
307 if (status != EXIT_SUCCESS)
308 fprintf (stderr, _("Try `%s --help' for more information.\n"),
313 Usage: %s [OPTION]... [FILE]...\n\
314 or: %s [OPTION]... --files0-from=F\n\
316 program_name, program_name);
318 Write sorted concatenation of all FILE(s) to standard output.\n\
322 Mandatory arguments to long options are mandatory for short options too.\n\
329 -b, --ignore-leading-blanks ignore leading blanks\n\
330 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
331 -f, --ignore-case fold lower case to upper case characters\n\
334 -g, --general-numeric-sort compare according to general numerical value\n\
335 -i, --ignore-nonprinting consider only printable characters\n\
336 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
337 -n, --numeric-sort compare according to string numerical value\n\
338 -R, --random-sort sort by random hash of keys\n\
339 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
340 --sort=WORD sort according to WORD:\n\
341 general-numeric -g, month -M, numeric -n,\n\
343 -r, --reverse reverse the result of comparisons\n\
351 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
352 for more use temp files\n\
355 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
356 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
357 --compress-program=PROG compress temporaries with PROG;\n\
358 decompress them with PROG -d\n\
359 --files0-from=F read input from the files specified by\n\
360 NUL-terminated names in file F\n\
363 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
364 -m, --merge merge already sorted files; do not sort\n\
367 -o, --output=FILE write result to FILE instead of standard output\n\
368 -s, --stable stabilize sort by disabling last-resort comparison\n\
369 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
372 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
373 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
374 multiple options specify multiple directories\n\
375 -u, --unique with -c, check for strict ordering;\n\
376 without -c, output only the first of an equal run\n\
379 -z, --zero-terminated end lines with 0 byte, not newline\n\
381 fputs (HELP_OPTION_DESCRIPTION, stdout);
382 fputs (VERSION_OPTION_DESCRIPTION, stdout);
385 POS is F[.C][OPTS], where F is the field number and C the character position\n\
386 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
387 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
388 one or more single-letter ordering options, which override global ordering\n\
389 options for that key. If no key is given, use the entire line as the key.\n\
391 SIZE may be followed by the following multiplicative suffixes:\n\
394 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
396 With no FILE, or when FILE is -, read standard input.\n\
399 The locale specified by the environment affects sort order.\n\
400 Set LC_ALL=C to get the traditional sort order that uses\n\
401 native byte values.\n\
403 emit_bug_reporting_address ();
409 /* For long options that have no equivalent short option, use a
410 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
413 CHECK_OPTION = CHAR_MAX + 1,
414 COMPRESS_PROGRAM_OPTION,
417 RANDOM_SOURCE_OPTION,
421 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
423 static struct option const long_options[] =
425 {"ignore-leading-blanks", no_argument, NULL, 'b'},
426 {"check", optional_argument, NULL, CHECK_OPTION},
427 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
428 {"dictionary-order", no_argument, NULL, 'd'},
429 {"ignore-case", no_argument, NULL, 'f'},
430 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
431 {"general-numeric-sort", no_argument, NULL, 'g'},
432 {"ignore-nonprinting", no_argument, NULL, 'i'},
433 {"key", required_argument, NULL, 'k'},
434 {"merge", no_argument, NULL, 'm'},
435 {"month-sort", no_argument, NULL, 'M'},
436 {"numeric-sort", no_argument, NULL, 'n'},
437 {"random-sort", no_argument, NULL, 'R'},
438 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
439 {"sort", required_argument, NULL, SORT_OPTION},
440 {"output", required_argument, NULL, 'o'},
441 {"reverse", no_argument, NULL, 'r'},
442 {"stable", no_argument, NULL, 's'},
443 {"batch-size", required_argument, NULL, NMERGE_OPTION},
444 {"buffer-size", required_argument, NULL, 'S'},
445 {"field-separator", required_argument, NULL, 't'},
446 {"temporary-directory", required_argument, NULL, 'T'},
447 {"unique", no_argument, NULL, 'u'},
448 {"zero-terminated", no_argument, NULL, 'z'},
449 {GETOPT_HELP_OPTION_DECL},
450 {GETOPT_VERSION_OPTION_DECL},
454 static char const *const check_args[] =
456 "quiet", "silent", "diagnose-first", NULL
458 static char const check_types[] =
462 ARGMATCH_VERIFY (check_args, check_types);
464 static char const *const sort_args[] =
466 "general-numeric", "month", "numeric", "random", NULL
468 static char const sort_types[] =
472 ARGMATCH_VERIFY (sort_args, sort_types);
474 /* The set of signals that are caught. */
475 static sigset_t caught_signals;
477 /* Critical section status. */
484 /* Enter a critical section. */
485 static struct cs_status
488 struct cs_status status;
489 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
493 /* Leave a critical section. */
495 cs_leave (struct cs_status status)
499 /* Ignore failure when restoring the signal mask. */
500 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
504 /* The list of temporary files. */
507 struct tempnode *volatile next;
508 pid_t pid; /* If compressed, the pid of compressor, else zero */
509 char name[1]; /* Actual size is 1 + file name length. */
511 static struct tempnode *volatile temphead;
512 static struct tempnode *volatile *temptail = &temphead;
517 pid_t pid; /* If compressed, the pid of compressor, else zero */
520 /* A table where we store compression process states. We clean up all
521 processes in a timely manner so as not to exhaust system resources,
522 so we store the info on whether the process is still running, or has
524 static Hash_table *proctab;
526 enum { INIT_PROCTAB_SIZE = 47 };
528 enum procstate { ALIVE, ZOMBIE };
530 /* A proctab entry. The COUNT field is there in case we fork a new
531 compression process that has the same PID as an old zombie process
532 that is still in the table (because the process to decompress the
533 temp file it was associated with hasn't started yet). */
537 enum procstate state;
542 proctab_hasher (const void *entry, size_t tabsize)
544 const struct procnode *node = entry;
545 return node->pid % tabsize;
549 proctab_comparator (const void *e1, const void *e2)
551 const struct procnode *n1 = e1, *n2 = e2;
552 return n1->pid == n2->pid;
555 /* The total number of forked processes (compressors and decompressors)
556 that have not been reaped yet. */
557 static size_t nprocs;
559 /* The number of child processes we'll allow before we try to reap some. */
560 enum { MAX_PROCS_BEFORE_REAP = 2 };
562 /* If 0 < PID, wait for the child process with that PID to exit.
563 If PID is -1, clean up a random child process which has finished and
564 return the process ID of that child. If PID is -1 and no processes
565 have quit yet, return 0 without waiting. */
571 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
574 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
578 if (! WIFEXITED (status) || WEXITSTATUS (status))
579 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
587 /* Add the PID of a running compression process to proctab, or update
588 the entry COUNT and STATE fields if it's already there. This also
589 creates the table for us the first time it's called. */
592 register_proc (pid_t pid)
594 struct procnode test, *node;
598 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
607 node = hash_lookup (proctab, &test);
615 node = xmalloc (sizeof *node);
619 hash_insert (proctab, node);
623 /* This is called when we reap a random process. We don't know
624 whether we have reaped a compression process or a decompression
625 process until we look in the table. If there's an ALIVE entry for
626 it, then we have reaped a compression process, so change the state
627 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
630 update_proc (pid_t pid)
632 struct procnode test, *node;
635 node = hash_lookup (proctab, &test);
637 node->state = ZOMBIE;
640 /* This is for when we need to wait for a compression process to exit.
641 If it has a ZOMBIE entry in the table then it's already dead and has
642 been reaped. Note that if there's an ALIVE entry for it, it still may
643 already have died and been reaped if a second process was created with
644 the same PID. This is probably exceedingly rare, but to be on the safe
645 side we will have to wait for any compression process with this PID. */
648 wait_proc (pid_t pid)
650 struct procnode test, *node;
653 node = hash_lookup (proctab, &test);
654 if (node->state == ALIVE)
657 node->state = ZOMBIE;
660 hash_delete (proctab, node);
665 /* Keep reaping finished children as long as there are more to reap.
666 This doesn't block waiting for any of them, it only reaps those
667 that are already dead. */
674 while (0 < nprocs && (pid = reap (-1)))
678 /* Clean up any remaining temporary files. */
683 struct tempnode const *node;
685 for (node = temphead; node; node = node->next)
690 /* Cleanup actions to take when exiting. */
697 /* Clean up any remaining temporary files in a critical section so
698 that a signal handler does not try to clean them too. */
699 struct cs_status cs = cs_enter ();
707 /* Create a new temporary file, returning its newly allocated tempnode.
708 Store into *PFD the file descriptor open for writing. */
710 static struct tempnode *
711 create_temp_file (int *pfd)
713 static char const slashbase[] = "/sortXXXXXX";
714 static size_t temp_dir_index;
717 char const *temp_dir = temp_dirs[temp_dir_index];
718 size_t len = strlen (temp_dir);
719 struct tempnode *node =
720 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
721 char *file = node->name;
724 memcpy (file, temp_dir, len);
725 memcpy (file + len, slashbase, sizeof slashbase);
728 if (++temp_dir_index == temp_dir_count)
731 /* Create the temporary file in a critical section, to avoid races. */
737 temptail = &node->next;
744 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
751 /* Return a stream for FILE, opened with mode HOW. A null FILE means
752 standard output; HOW should be "w". When opening for input, "-"
753 means standard input. To avoid confusion, do not return file
754 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
755 opening an ordinary FILE. */
758 xfopen (const char *file, const char *how)
764 else if (STREQ (file, "-") && *how == 'r')
766 have_read_stdin = true;
771 fp = fopen (file, how);
773 die (_("open failed"), file);
779 /* Close FP, whose name is FILE, and report any errors. */
782 xfclose (FILE *fp, char const *file)
787 /* Allow reading stdin from tty more than once. */
793 /* Don't close stdout just yet. close_stdout does that. */
794 if (fflush (fp) != 0)
795 die (_("fflush failed"), file);
799 if (fclose (fp) != 0)
800 die (_("close failed"), file);
806 dup2_or_die (int oldfd, int newfd)
808 if (dup2 (oldfd, newfd) < 0)
809 error (SORT_FAILURE, errno, _("dup2 failed"));
812 /* Fork a child process for piping to and do common cleanup. The
813 TRIES parameter tells us how many times to try to fork before
814 giving up. Return the PID of the child or -1 if fork failed. */
817 pipe_fork (int pipefds[2], size_t tries)
819 #if HAVE_WORKING_FORK
820 struct tempnode *saved_temphead;
822 unsigned int wait_retry = 1;
823 pid_t pid IF_LINT (= -1);
826 if (pipe (pipefds) < 0)
831 /* This is so the child process won't delete our temp files
832 if it receives a signal before exec-ing. */
834 saved_temphead = temphead;
840 temphead = saved_temphead;
845 if (0 <= pid || errno != EAGAIN)
862 close (STDIN_FILENO);
863 close (STDOUT_FILENO);
870 #else /* ! HAVE_WORKING_FORK */
875 /* Create a temporary file and start a compression program to filter output
876 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
877 set it to the PID of the newly-created process. */
880 create_temp (FILE **pfp, pid_t *ppid)
883 struct tempnode *node = create_temp_file (&tempfd);
884 char *name = node->name;
886 if (compress_program)
890 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
897 register_proc (node->pid);
899 else if (node->pid == 0)
902 dup2_or_die (tempfd, STDOUT_FILENO);
904 dup2_or_die (pipefds[0], STDIN_FILENO);
907 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
908 error (SORT_FAILURE, errno, _("couldn't execute %s"),
915 *pfp = fdopen (tempfd, "w");
917 die (_("couldn't create temporary file"), name);
925 /* Open a compressed temp file and start a decompression process through
926 which to filter the input. PID must be the valid processes ID of the
927 process used to compress the file. */
930 open_temp (const char *name, pid_t pid)
932 int tempfd, pipefds[2];
938 tempfd = open (name, O_RDONLY);
940 die (_("couldn't open temporary file"), name);
942 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
948 else if (child_pid == 0)
951 dup2_or_die (tempfd, STDIN_FILENO);
953 dup2_or_die (pipefds[1], STDOUT_FILENO);
956 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
957 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
961 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
964 fp = fdopen (pipefds[0], "r");
966 die (_("couldn't create temporary file"), name);
972 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
974 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
975 die (_("write failed"), output_file);
978 /* Append DIR to the array of temporary directory names. */
980 add_temp_dir (char const *dir)
982 if (temp_dir_count == temp_dir_alloc)
983 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
985 temp_dirs[temp_dir_count++] = dir;
988 /* Remove NAME from the list of temporary files. */
991 zaptemp (const char *name)
993 struct tempnode *volatile *pnode;
994 struct tempnode *node;
995 struct tempnode *next;
997 int unlink_errno = 0;
1000 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1003 /* Unlink the temporary file in a critical section to avoid races. */
1006 unlink_status = unlink (name);
1007 unlink_errno = errno;
1011 if (unlink_status != 0)
1012 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1018 #if HAVE_NL_LANGINFO
1021 struct_month_cmp (const void *m1, const void *m2)
1023 struct month const *month1 = m1;
1024 struct month const *month2 = m2;
1025 return strcmp (month1->name, month2->name);
1030 /* Initialize the character class tables. */
1037 for (i = 0; i < UCHAR_LIM; ++i)
1039 blanks[i] = !! isblank (i);
1040 nonprinting[i] = ! isprint (i);
1041 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1042 fold_toupper[i] = toupper (i);
1045 #if HAVE_NL_LANGINFO
1046 /* If we're not in the "C" locale, read different names for months. */
1049 for (i = 0; i < MONTHS_PER_YEAR; i++)
1056 s = (char *) nl_langinfo (ABMON_1 + i);
1058 monthtab[i].name = name = xmalloc (s_len + 1);
1059 monthtab[i].val = i + 1;
1061 for (j = 0; j < s_len; j++)
1062 name[j] = fold_toupper[to_uchar (s[j])];
1065 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1066 sizeof *monthtab, struct_month_cmp);
1071 /* Specify how many inputs may be merged at once.
1072 This may be set on the command-line with the
1073 --batch-size option. */
1075 specify_nmerge (int oi, char c, char const *s)
1078 struct rlimit rlimit;
1079 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1081 /* Try to find out how many file descriptors we'll be able
1082 to open. We need at least nmerge + 3 (STDIN_FILENO,
1083 STDOUT_FILENO and STDERR_FILENO). */
1084 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1089 if (e == LONGINT_OK)
1093 e = LONGINT_OVERFLOW;
1098 error (0, 0, _("invalid --%s argument %s"),
1099 long_options[oi].name, quote(s));
1100 error (SORT_FAILURE, 0,
1101 _("minimum --%s argument is %s"),
1102 long_options[oi].name, quote("2"));
1104 else if (max_nmerge < nmerge)
1106 e = LONGINT_OVERFLOW;
1113 if (e == LONGINT_OVERFLOW)
1115 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1116 error (0, 0, _("--%s argument %s too large"),
1117 long_options[oi].name, quote(s));
1118 error (SORT_FAILURE, 0,
1119 _("maximum --%s argument with current rlimit is %s"),
1120 long_options[oi].name,
1121 uinttostr (max_nmerge, max_nmerge_buf));
1124 xstrtol_fatal (e, oi, c, long_options, s);
1127 /* Specify the amount of main memory to use when sorting. */
1129 specify_sort_size (int oi, char c, char const *s)
1133 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1135 /* The default unit is KiB. */
1136 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1138 if (n <= UINTMAX_MAX / 1024)
1141 e = LONGINT_OVERFLOW;
1144 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1145 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1154 double mem = physmem_total () * n / 100;
1156 /* Use "<", not "<=", to avoid problems with rounding. */
1157 if (mem < UINTMAX_MAX)
1163 e = LONGINT_OVERFLOW;
1168 if (e == LONGINT_OK)
1170 /* If multiple sort sizes are specified, take the maximum, so
1171 that option order does not matter. */
1178 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1182 e = LONGINT_OVERFLOW;
1185 xstrtol_fatal (e, oi, c, long_options, s);
1188 /* Return the default sort size. */
1190 default_sort_size (void)
1192 /* Let MEM be available memory or 1/8 of total memory, whichever
1194 double avail = physmem_available ();
1195 double total = physmem_total ();
1196 double mem = MAX (avail, total / 8);
1197 struct rlimit rlimit;
1199 /* Let SIZE be MEM, but no more than the maximum object size or
1200 system resource limits. Avoid the MIN macro here, as it is not
1201 quite right when only one argument is floating point. Don't
1202 bother to check for values like RLIM_INFINITY since in practice
1203 they are not much less than SIZE_MAX. */
1204 size_t size = SIZE_MAX;
1207 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1208 size = rlimit.rlim_cur;
1210 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1211 size = rlimit.rlim_cur;
1214 /* Leave a large safety margin for the above limits, as failure can
1215 occur when they are exceeded. */
1219 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1220 Exceeding RSS is not fatal, but can be quite slow. */
1221 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1222 size = rlimit.rlim_cur / 16 * 15;
1225 /* Use no less than the minimum. */
1226 return MAX (size, MIN_SORT_SIZE);
1229 /* Return the sort buffer size to use with the input files identified
1230 by FPS and FILES, which are alternate names of the same files.
1231 NFILES gives the number of input files; NFPS may be less. Assume
1232 that each input line requires LINE_BYTES extra bytes' worth of line
1233 information. Do not exceed the size bound specified by the user
1234 (or a default size bound, if the user does not specify one). */
1237 sort_buffer_size (FILE *const *fps, size_t nfps,
1238 char *const *files, size_t nfiles,
1241 /* A bound on the input size. If zero, the bound hasn't been
1243 static size_t size_bound;
1245 /* In the worst case, each input byte is a newline. */
1246 size_t worst_case_per_input_byte = line_bytes + 1;
1248 /* Keep enough room for one extra input line and an extra byte.
1249 This extra room might be needed when preparing to read EOF. */
1250 size_t size = worst_case_per_input_byte + 1;
1254 for (i = 0; i < nfiles; i++)
1260 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1261 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1262 : stat (files[i], &st))
1264 die (_("stat failed"), files[i]);
1266 if (S_ISREG (st.st_mode))
1267 file_size = st.st_size;
1270 /* The file has unknown size. If the user specified a sort
1271 buffer size, use that; otherwise, guess the size. */
1274 file_size = INPUT_FILE_SIZE_GUESS;
1279 size_bound = sort_size;
1281 size_bound = default_sort_size ();
1284 /* Add the amount of memory needed to represent the worst case
1285 where the input consists entirely of newlines followed by a
1286 single non-newline. Check for overflow. */
1287 worst_case = file_size * worst_case_per_input_byte + 1;
1288 if (file_size != worst_case / worst_case_per_input_byte
1289 || size_bound - size <= worst_case)
1297 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1298 must be at least sizeof (struct line). Allocate ALLOC bytes
1302 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1304 /* Ensure that the line array is properly aligned. If the desired
1305 size cannot be allocated, repeatedly halve it until allocation
1306 succeeds. The smaller allocation may hurt overall performance,
1307 but that's better than failing. */
1310 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1311 buf->buf = malloc (alloc);
1315 if (alloc <= line_bytes + 1)
1319 buf->line_bytes = line_bytes;
1321 buf->used = buf->left = buf->nlines = 0;
1325 /* Return one past the limit of the line array. */
1327 static inline struct line *
1328 buffer_linelim (struct buffer const *buf)
1330 return (struct line *) (buf->buf + buf->alloc);
1333 /* Return a pointer to the first character of the field specified
1337 begfield (const struct line *line, const struct keyfield *key)
1339 char *ptr = line->text, *lim = ptr + line->length - 1;
1340 size_t sword = key->sword;
1341 size_t schar = key->schar;
1342 size_t remaining_bytes;
1344 /* The leading field separator itself is included in a field when -t
1347 if (tab != TAB_DEFAULT)
1348 while (ptr < lim && sword--)
1350 while (ptr < lim && *ptr != tab)
1356 while (ptr < lim && sword--)
1358 while (ptr < lim && blanks[to_uchar (*ptr)])
1360 while (ptr < lim && !blanks[to_uchar (*ptr)])
1364 if (key->skipsblanks)
1365 while (ptr < lim && blanks[to_uchar (*ptr)])
1368 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1369 remaining_bytes = lim - ptr;
1370 if (schar < remaining_bytes)
1378 /* Return the limit of (a pointer to the first character after) the field
1379 in LINE specified by KEY. */
1382 limfield (const struct line *line, const struct keyfield *key)
1384 char *ptr = line->text, *lim = ptr + line->length - 1;
1385 size_t eword = key->eword, echar = key->echar;
1386 size_t remaining_bytes;
1388 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1389 whichever comes first. If there are more than EWORD fields, leave
1390 PTR pointing at the beginning of the field having zero-based index,
1391 EWORD. If a delimiter character was specified (via -t), then that
1392 `beginning' is the first character following the delimiting TAB.
1393 Otherwise, leave PTR pointing at the first `blank' character after
1394 the preceding field. */
1395 if (tab != TAB_DEFAULT)
1396 while (ptr < lim && eword--)
1398 while (ptr < lim && *ptr != tab)
1400 if (ptr < lim && (eword | echar))
1404 while (ptr < lim && eword--)
1406 while (ptr < lim && blanks[to_uchar (*ptr)])
1408 while (ptr < lim && !blanks[to_uchar (*ptr)])
1412 #ifdef POSIX_UNSPECIFIED
1413 /* The following block of code makes GNU sort incompatible with
1414 standard Unix sort, so it's ifdef'd out for now.
1415 The POSIX spec isn't clear on how to interpret this.
1416 FIXME: request clarification.
1418 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1419 Date: Thu, 30 May 96 12:20:41 -0400
1420 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1422 [...]I believe I've found another bug in `sort'.
1427 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1430 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1434 Unix sort produced the answer I expected: sort on the single character
1435 in column 7. GNU sort produced different results, because it disagrees
1436 on the interpretation of the key-end spec "M.N". Unix sort reads this
1437 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1438 "skip M-1 fields, then either N-1 characters or the rest of the current
1439 field, whichever comes first". This extra clause applies only to
1440 key-ends, not key-starts.
1443 /* Make LIM point to the end of (one byte past) the current field. */
1444 if (tab != TAB_DEFAULT)
1447 newlim = memchr (ptr, tab, lim - ptr);
1455 while (newlim < lim && blanks[to_uchar (*newlim)])
1457 while (newlim < lim && !blanks[to_uchar (*newlim)])
1463 /* If we're ignoring leading blanks when computing the End
1464 of the field, don't start counting bytes until after skipping
1465 past any leading blanks. */
1466 if (key->skipeblanks)
1467 while (ptr < lim && blanks[to_uchar (*ptr)])
1470 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1471 remaining_bytes = lim - ptr;
1472 if (echar < remaining_bytes)
1480 /* Fill BUF reading from FP, moving buf->left bytes from the end
1481 of buf->buf to the beginning first. If EOF is reached and the
1482 file wasn't terminated by a newline, supply one. Set up BUF's line
1483 table too. FILE is the name of the file corresponding to FP.
1484 Return true if some input was read. */
1487 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1489 struct keyfield const *key = keylist;
1491 size_t line_bytes = buf->line_bytes;
1492 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1497 if (buf->used != buf->left)
1499 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1500 buf->used = buf->left;
1506 char *ptr = buf->buf + buf->used;
1507 struct line *linelim = buffer_linelim (buf);
1508 struct line *line = linelim - buf->nlines;
1509 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1510 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1512 while (line_bytes + 1 < avail)
1514 /* Read as many bytes as possible, but do not read so many
1515 bytes that there might not be enough room for the
1516 corresponding line array. The worst case is when the
1517 rest of the input file consists entirely of newlines,
1518 except that the last byte is not a newline. */
1519 size_t readsize = (avail - 1) / (line_bytes + 1);
1520 size_t bytes_read = fread (ptr, 1, readsize, fp);
1521 char *ptrlim = ptr + bytes_read;
1523 avail -= bytes_read;
1525 if (bytes_read != readsize)
1528 die (_("read failed"), file);
1532 if (buf->buf == ptrlim)
1534 if (ptrlim[-1] != eol)
1539 /* Find and record each line in the just-read input. */
1540 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1544 line->text = line_start;
1545 line->length = ptr - line_start;
1546 mergesize = MAX (mergesize, line->length);
1547 avail -= line_bytes;
1551 /* Precompute the position of the first key for
1553 line->keylim = (key->eword == SIZE_MAX
1555 : limfield (line, key));
1557 if (key->sword != SIZE_MAX)
1558 line->keybeg = begfield (line, key);
1561 if (key->skipsblanks)
1562 while (blanks[to_uchar (*line_start)])
1564 line->keybeg = line_start;
1576 buf->used = ptr - buf->buf;
1577 buf->nlines = buffer_linelim (buf) - line;
1578 if (buf->nlines != 0)
1580 buf->left = ptr - line_start;
1581 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1586 /* The current input line is too long to fit in the buffer.
1587 Double the buffer size and try again, keeping it properly
1589 size_t line_alloc = buf->alloc / sizeof (struct line);
1590 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1591 buf->alloc = line_alloc * sizeof (struct line);
1596 /* Compare strings A and B as numbers without explicitly converting them to
1597 machine numbers. Comparatively slow for short strings, but asymptotically
1601 numcompare (const char *a, const char *b)
1603 while (blanks[to_uchar (*a)])
1605 while (blanks[to_uchar (*b)])
1608 return strnumcmp (a, b, decimal_point, thousands_sep);
1612 general_numcompare (const char *sa, const char *sb)
1614 /* FIXME: add option to warn about failed conversions. */
1615 /* FIXME: maybe add option to try expensive FP conversion
1616 only if A and B can't be compared more cheaply/accurately. */
1620 double a = strtod (sa, &ea);
1621 double b = strtod (sb, &eb);
1623 /* Put conversion errors at the start of the collating sequence. */
1625 return sb == eb ? 0 : -1;
1629 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1630 conversion errors but before numbers; sort them by internal
1631 bit-pattern, for lack of a more portable alternative. */
1637 : memcmp ((char *) &a, (char *) &b, sizeof a));
1640 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1641 Return 0 if the name in S is not recognized. */
1644 getmonth (char const *month, size_t len)
1647 size_t hi = MONTHS_PER_YEAR;
1648 char const *monthlim = month + len;
1652 if (month == monthlim)
1654 if (!blanks[to_uchar (*month)])
1661 size_t ix = (lo + hi) / 2;
1662 char const *m = month;
1663 char const *n = monthtab[ix].name;
1668 return monthtab[ix].val;
1669 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1674 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1686 /* A source of random data. */
1687 static struct randread_source *randread_source;
1689 /* Return the Ith randomly-generated state. The caller must invoke
1690 random_state (H) for all H less than I before invoking random_state
1693 static struct md5_ctx
1694 random_state (size_t i)
1696 /* An array of states resulting from the random data, and counts of
1697 its used and allocated members. */
1698 static struct md5_ctx *state;
1700 static size_t allocated;
1702 struct md5_ctx *s = &state[i];
1706 unsigned char buf[MD5_DIGEST_SIZE];
1712 state = X2NREALLOC (state, &allocated);
1716 randread (randread_source, buf, sizeof buf);
1718 md5_process_bytes (buf, sizeof buf, s);
1724 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1725 with length LENGTHB. Return negative if less, zero if equal,
1726 positive if greater. */
1729 cmp_hashes (char const *texta, size_t lena,
1730 char const *textb, size_t lenb)
1732 /* Try random hashes until a pair of hashes disagree. But if the
1733 first pair of random hashes agree, check whether the keys are
1734 identical and if so report no difference. */
1739 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1740 struct md5_ctx s[2];
1741 s[0] = s[1] = random_state (i);
1742 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1743 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1744 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1747 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1754 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1755 using one or more random hash functions. */
1758 compare_random (char *restrict texta, size_t lena,
1759 char *restrict textb, size_t lenb)
1763 if (! hard_LC_COLLATE)
1764 diff = cmp_hashes (texta, lena, textb, lenb);
1767 /* Transform the text into the basis of comparison, so that byte
1768 strings that would otherwise considered to be equal are
1769 considered equal here even if their bytes differ. */
1772 char stackbuf[4000];
1773 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1774 bool a_fits = tlena <= sizeof stackbuf;
1775 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1776 (a_fits ? sizeof stackbuf - tlena : 0),
1779 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1783 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1784 faster by avoiding the need for an extra buffer copy. */
1785 buf = xmalloc (tlena + tlenb + 1);
1786 xmemxfrm (buf, tlena + 1, texta, lena);
1787 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1790 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1792 if (buf != stackbuf)
1799 /* Compare two lines A and B trying every key in sequence until there
1800 are no more keys or a difference is found. */
1803 keycompare (const struct line *a, const struct line *b)
1805 struct keyfield const *key = keylist;
1807 /* For the first iteration only, the key positions have been
1808 precomputed for us. */
1809 char *texta = a->keybeg;
1810 char *textb = b->keybeg;
1811 char *lima = a->keylim;
1812 char *limb = b->keylim;
1818 char const *translate = key->translate;
1819 bool const *ignore = key->ignore;
1821 /* Find the lengths. */
1822 size_t lena = lima <= texta ? 0 : lima - texta;
1823 size_t lenb = limb <= textb ? 0 : limb - textb;
1825 /* Actually compare the fields. */
1828 diff = compare_random (texta, lena, textb, lenb);
1829 else if (key->numeric | key->general_numeric)
1831 char savea = *lima, saveb = *limb;
1833 *lima = *limb = '\0';
1834 diff = ((key->numeric ? numcompare : general_numcompare)
1836 *lima = savea, *limb = saveb;
1838 else if (key->month)
1839 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1840 /* Sorting like this may become slow, so in a simple locale the user
1841 can select a faster sort that is similar to ascii sort. */
1842 else if (hard_LC_COLLATE)
1844 if (ignore || translate)
1847 size_t size = lena + 1 + lenb + 1;
1848 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1849 char *copy_b = copy_a + lena + 1;
1850 size_t new_len_a, new_len_b, i;
1852 /* Ignore and/or translate chars before comparing. */
1853 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1857 copy_a[new_len_a] = (translate
1858 ? translate[to_uchar (texta[i])]
1860 if (!ignore || !ignore[to_uchar (texta[i])])
1865 copy_b[new_len_b] = (translate
1866 ? translate[to_uchar (textb[i])]
1868 if (!ignore || !ignore[to_uchar (textb[i])])
1873 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1875 if (sizeof buf < size)
1879 diff = - NONZERO (lenb);
1883 diff = xmemcoll (texta, lena, textb, lenb);
1887 #define CMP_WITH_IGNORE(A, B) \
1892 while (texta < lima && ignore[to_uchar (*texta)]) \
1894 while (textb < limb && ignore[to_uchar (*textb)]) \
1896 if (! (texta < lima && textb < limb)) \
1898 diff = to_uchar (A) - to_uchar (B); \
1905 diff = (texta < lima) - (textb < limb); \
1910 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1911 translate[to_uchar (*textb)]);
1913 CMP_WITH_IGNORE (*texta, *textb);
1916 diff = - NONZERO (lenb);
1923 while (texta < lima && textb < limb)
1925 diff = (to_uchar (translate[to_uchar (*texta++)])
1926 - to_uchar (translate[to_uchar (*textb++)]));
1933 diff = memcmp (texta, textb, MIN (lena, lenb));
1937 diff = lena < lenb ? -1 : lena != lenb;
1947 /* Find the beginning and limit of the next field. */
1948 if (key->eword != SIZE_MAX)
1949 lima = limfield (a, key), limb = limfield (b, key);
1951 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1953 if (key->sword != SIZE_MAX)
1954 texta = begfield (a, key), textb = begfield (b, key);
1957 texta = a->text, textb = b->text;
1958 if (key->skipsblanks)
1960 while (texta < lima && blanks[to_uchar (*texta)])
1962 while (textb < limb && blanks[to_uchar (*textb)])
1973 return key->reverse ? -diff : diff;
1976 /* Compare two lines A and B, returning negative, zero, or positive
1977 depending on whether A compares less than, equal to, or greater than B. */
1980 compare (const struct line *a, const struct line *b)
1985 /* First try to compare on the specified keys (if any).
1986 The only two cases with no key at all are unadorned sort,
1987 and unadorned sort -r. */
1990 diff = keycompare (a, b);
1991 if (diff | unique | stable)
1995 /* If the keys all compare equal (or no keys were specified)
1996 fall through to the default comparison. */
1997 alen = a->length - 1, blen = b->length - 1;
2000 diff = - NONZERO (blen);
2003 else if (hard_LC_COLLATE)
2004 diff = xmemcoll (a->text, alen, b->text, blen);
2005 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2006 diff = alen < blen ? -1 : alen != blen;
2008 return reverse ? -diff : diff;
2011 /* Check that the lines read from FILE_NAME come in order. Return
2012 true if they are in order. If CHECKONLY == 'c', also print a
2013 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2014 they are not in order. */
2017 check (char const *file_name, char checkonly)
2019 FILE *fp = xfopen (file_name, "r");
2020 struct buffer buf; /* Input buffer. */
2021 struct line temp; /* Copy of previous line. */
2023 uintmax_t line_number = 0;
2024 struct keyfield const *key = keylist;
2025 bool nonunique = ! unique;
2026 bool ordered = true;
2028 initbuf (&buf, sizeof (struct line),
2029 MAX (merge_buffer_size, sort_size));
2032 while (fillbuf (&buf, fp, file_name))
2034 struct line const *line = buffer_linelim (&buf);
2035 struct line const *linebase = line - buf.nlines;
2037 /* Make sure the line saved from the old buffer contents is
2038 less than or equal to the first line of the new buffer. */
2039 if (alloc && nonunique <= compare (&temp, line - 1))
2043 if (checkonly == 'c')
2045 struct line const *disorder_line = line - 1;
2046 uintmax_t disorder_line_number =
2047 buffer_linelim (&buf) - disorder_line + line_number;
2048 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2049 fprintf (stderr, _("%s: %s:%s: disorder: "),
2050 program_name, file_name,
2051 umaxtostr (disorder_line_number, hr_buf));
2052 write_bytes (disorder_line->text, disorder_line->length,
2053 stderr, _("standard error"));
2061 /* Compare each line in the buffer with its successor. */
2062 while (linebase < --line)
2063 if (nonunique <= compare (line, line - 1))
2064 goto found_disorder;
2066 line_number += buf.nlines;
2068 /* Save the last line of the buffer. */
2069 if (alloc < line->length)
2076 alloc = line->length;
2080 while (alloc < line->length);
2082 temp.text = xrealloc (temp.text, alloc);
2084 memcpy (temp.text, line->text, line->length);
2085 temp.length = line->length;
2088 temp.keybeg = temp.text + (line->keybeg - line->text);
2089 temp.keylim = temp.text + (line->keylim - line->text);
2093 xfclose (fp, file_name);
2099 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2100 files (all of which are at the start of the FILES array), and
2101 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2102 Close input and output files before returning.
2103 OUTPUT_FILE gives the name of the output file. If it is NULL,
2104 the output file is standard output. If OFP is NULL, the output
2105 file has not been opened yet (or written to, if standard output). */
2108 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2109 FILE *ofp, char const *output_file)
2111 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2112 /* Input streams for each file. */
2113 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2114 /* Input buffers for each file. */
2115 struct line saved; /* Saved line storage for unique check. */
2116 struct line const *savedline = NULL;
2117 /* &saved if there is a saved line. */
2118 size_t savealloc = 0; /* Size allocated for the saved line. */
2119 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2120 /* Current line in each line table. */
2121 struct line const **base = xnmalloc (nmerge, sizeof *base);
2122 /* Base of each line table. */
2123 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2124 /* Table representing a permutation of fps,
2125 such that cur[ord[0]] is the smallest line
2126 and will be next output. */
2130 struct keyfield const *key = keylist;
2133 /* Read initial lines from each input file. */
2134 for (i = 0; i < nfiles; )
2136 fps[i] = (files[i].pid
2137 ? open_temp (files[i].name, files[i].pid)
2138 : xfopen (files[i].name, "r"));
2139 initbuf (&buffer[i], sizeof (struct line),
2140 MAX (merge_buffer_size, sort_size / nfiles));
2141 if (fillbuf (&buffer[i], fps[i], files[i].name))
2143 struct line const *linelim = buffer_linelim (&buffer[i]);
2144 cur[i] = linelim - 1;
2145 base[i] = linelim - buffer[i].nlines;
2150 /* fps[i] is empty; eliminate it from future consideration. */
2151 xfclose (fps[i], files[i].name);
2155 zaptemp (files[i].name);
2157 free (buffer[i].buf);
2159 for (j = i; j < nfiles; ++j)
2160 files[j] = files[j + 1];
2165 ofp = xfopen (output_file, "w");
2167 /* Set up the ord table according to comparisons among input lines.
2168 Since this only reorders two items if one is strictly greater than
2169 the other, it is stable. */
2170 for (i = 0; i < nfiles; ++i)
2172 for (i = 1; i < nfiles; ++i)
2173 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2174 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2176 /* Repeatedly output the smallest line until no input remains. */
2179 struct line const *smallest = cur[ord[0]];
2181 /* If uniquified output is turned on, output only the first of
2182 an identical series of lines. */
2185 if (savedline && compare (savedline, smallest))
2188 write_bytes (saved.text, saved.length, ofp, output_file);
2193 if (savealloc < smallest->length)
2198 savealloc = smallest->length;
2201 while ((savealloc *= 2) < smallest->length);
2203 saved.text = xrealloc (saved.text, savealloc);
2205 saved.length = smallest->length;
2206 memcpy (saved.text, smallest->text, saved.length);
2210 saved.text + (smallest->keybeg - smallest->text);
2212 saved.text + (smallest->keylim - smallest->text);
2217 write_bytes (smallest->text, smallest->length, ofp, output_file);
2219 /* Check if we need to read more lines into core. */
2220 if (base[ord[0]] < smallest)
2221 cur[ord[0]] = smallest - 1;
2224 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2226 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2227 cur[ord[0]] = linelim - 1;
2228 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2232 /* We reached EOF on fps[ord[0]]. */
2233 for (i = 1; i < nfiles; ++i)
2234 if (ord[i] > ord[0])
2237 xfclose (fps[ord[0]], files[ord[0]].name);
2238 if (ord[0] < ntemps)
2241 zaptemp (files[ord[0]].name);
2243 free (buffer[ord[0]].buf);
2244 for (i = ord[0]; i < nfiles; ++i)
2246 fps[i] = fps[i + 1];
2247 files[i] = files[i + 1];
2248 buffer[i] = buffer[i + 1];
2249 cur[i] = cur[i + 1];
2250 base[i] = base[i + 1];
2252 for (i = 0; i < nfiles; ++i)
2253 ord[i] = ord[i + 1];
2258 /* The new line just read in may be larger than other lines
2259 already in main memory; push it back in the queue until we
2260 encounter a line larger than it. Optimize for the common
2261 case where the new line is smallest. */
2266 size_t ord0 = ord[0];
2267 size_t count_of_smaller_lines;
2271 int cmp = compare (cur[ord0], cur[ord[probe]]);
2272 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2276 probe = (lo + hi) / 2;
2279 count_of_smaller_lines = lo - 1;
2280 for (j = 0; j < count_of_smaller_lines; j++)
2281 ord[j] = ord[j + 1];
2282 ord[count_of_smaller_lines] = ord0;
2285 /* Free up some resources every once in a while. */
2286 if (MAX_PROCS_BEFORE_REAP < nprocs)
2290 if (unique && savedline)
2292 write_bytes (saved.text, saved.length, ofp, output_file);
2296 xfclose (ofp, output_file);
2304 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2305 and HI (with NHI members). T, LO, and HI point just past their
2306 respective arrays, and the arrays are in reverse order. NLO and
2307 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2310 mergelines (struct line *t,
2311 struct line const *lo, size_t nlo,
2312 struct line const *hi, size_t nhi)
2315 if (compare (lo - 1, hi - 1) <= 0)
2320 /* HI - NHI equalled T - (NLO + NHI) when this function
2321 began. Therefore HI must equal T now, and there is no
2322 need to copy from HI to T. */
2340 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2341 NLINES must be at least 2.
2342 The input and output arrays are in reverse order, and LINES and
2343 TEMP point just past the end of their respective arrays.
2345 Use a recursive divide-and-conquer algorithm, in the style
2346 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2347 the optimization suggested by exercise 5.2.4-10; this requires room
2348 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2349 writes that this memory optimization was originally published by
2350 D. A. Bell, Comp J. 1 (1958), 75. */
2353 sortlines (struct line *lines, size_t nlines, struct line *temp)
2357 if (0 < compare (&lines[-1], &lines[-2]))
2359 struct line tmp = lines[-1];
2360 lines[-1] = lines[-2];
2366 size_t nlo = nlines / 2;
2367 size_t nhi = nlines - nlo;
2368 struct line *lo = lines;
2369 struct line *hi = lines - nlo;
2370 struct line *sorted_lo = temp;
2372 sortlines (hi, nhi, temp);
2374 sortlines_temp (lo, nlo, sorted_lo);
2376 sorted_lo[-1] = lo[-1];
2378 mergelines (lines, sorted_lo, nlo, hi, nhi);
2382 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2383 rather than sorting in place. */
2386 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2390 /* Declare `swap' as int, not bool, to work around a bug
2391 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2392 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2393 int swap = (0 < compare (&lines[-1], &lines[-2]));
2394 temp[-1] = lines[-1 - swap];
2395 temp[-2] = lines[-2 + swap];
2399 size_t nlo = nlines / 2;
2400 size_t nhi = nlines - nlo;
2401 struct line *lo = lines;
2402 struct line *hi = lines - nlo;
2403 struct line *sorted_hi = temp - nlo;
2405 sortlines_temp (hi, nhi, sorted_hi);
2407 sortlines (lo, nlo, temp);
2409 mergelines (temp, lo, nlo, sorted_hi, nhi);
2413 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2414 the same as OUTFILE. If found, merge the found instances (and perhaps
2415 some other files) into a temporary file so that it can in turn be
2416 merged into OUTFILE without destroying OUTFILE before it is completely
2417 read. Return the new value of NFILES, which differs from the old if
2418 some merging occurred.
2420 This test ensures that an otherwise-erroneous use like
2421 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2422 It's not clear that POSIX requires this nicety.
2423 Detect common error cases, but don't try to catch obscure cases like
2424 "cat ... FILE ... | sort -m -o FILE"
2425 where traditional "sort" doesn't copy the input and where
2426 people should know that they're getting into trouble anyway.
2427 Catching these obscure cases would slow down performance in
2431 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2432 size_t nfiles, char const *outfile)
2435 bool got_outstat = false;
2436 struct stat outstat;
2438 for (i = ntemps; i < nfiles; i++)
2440 bool is_stdin = STREQ (files[i].name, "-");
2444 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2451 ? stat (outfile, &outstat)
2452 : fstat (STDOUT_FILENO, &outstat))
2459 ? fstat (STDIN_FILENO, &instat)
2460 : stat (files[i].name, &instat))
2462 && SAME_INODE (instat, outstat));
2469 char *temp = create_temp (&tftp, &pid);
2470 mergefps (&files[i],0, nfiles - i, tftp, temp);
2471 files[i].name = temp;
2480 /* Merge the input FILES. NTEMPS is the number of files at the
2481 start of FILES that are temporary; it is zero at the top level.
2482 NFILES is the total number of files. Put the output in
2483 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2486 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2487 char const *output_file)
2489 while (nmerge < nfiles)
2491 /* Number of input files processed so far. */
2494 /* Number of output files generated so far. */
2497 /* nfiles % NMERGE; this counts input files that are left over
2498 after all full-sized merges have been done. */
2501 /* Number of easily-available slots at the next loop iteration. */
2504 /* Do as many NMERGE-size merges as possible. */
2505 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2509 char *temp = create_temp (&tfp, &pid);
2510 size_t nt = MIN (ntemps, nmerge);
2512 mergefps (&files[in], nt, nmerge, tfp, temp);
2513 files[out].name = temp;
2514 files[out].pid = pid;
2517 remainder = nfiles - in;
2518 cheap_slots = nmerge - out % nmerge;
2520 if (cheap_slots < remainder)
2522 /* So many files remain that they can't all be put into the last
2523 NMERGE-sized output window. Do one more merge. Merge as few
2524 files as possible, to avoid needless I/O. */
2525 size_t nshortmerge = remainder - cheap_slots + 1;
2528 char *temp = create_temp (&tfp, &pid);
2529 size_t nt = MIN (ntemps, nshortmerge);
2531 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2532 files[out].name = temp;
2533 files[out++].pid = pid;
2537 /* Put the remaining input files into the last NMERGE-sized output
2538 window, so they will be merged in the next pass. */
2539 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2544 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2545 mergefps (files, ntemps, nfiles, NULL, output_file);
2548 /* Sort NFILES FILES onto OUTPUT_FILE. */
2551 sort (char * const *files, size_t nfiles, char const *output_file)
2555 bool output_file_created = false;
2561 char const *temp_output;
2562 char const *file = *files;
2563 FILE *fp = xfopen (file, "r");
2565 size_t bytes_per_line = (2 * sizeof (struct line)
2566 - sizeof (struct line) / 2);
2569 initbuf (&buf, bytes_per_line,
2570 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2575 while (fillbuf (&buf, fp, file))
2578 struct line *linebase;
2580 if (buf.eof && nfiles
2581 && (bytes_per_line + 1
2582 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2584 /* End of file, but there is more input and buffer room.
2585 Concatenate the next input file; this is faster in
2587 buf.left = buf.used;
2591 line = buffer_linelim (&buf);
2592 linebase = line - buf.nlines;
2594 sortlines (line, buf.nlines, linebase);
2595 if (buf.eof && !nfiles && !ntemps && !buf.left)
2598 tfp = xfopen (output_file, "w");
2599 temp_output = output_file;
2600 output_file_created = true;
2605 temp_output = create_temp (&tfp, NULL);
2611 write_bytes (line->text, line->length, tfp, temp_output);
2613 while (linebase < line && compare (line, line - 1) == 0)
2616 while (linebase < line);
2618 xfclose (tfp, temp_output);
2620 /* Free up some resources every once in a while. */
2621 if (MAX_PROCS_BEFORE_REAP < nprocs)
2624 if (output_file_created)
2633 if (! output_file_created)
2636 struct tempnode *node = temphead;
2637 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2638 for (i = 0; node; i++)
2640 tempfiles[i].name = node->name;
2641 tempfiles[i].pid = node->pid;
2644 merge (tempfiles, ntemps, ntemps, output_file);
2649 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2652 insertkey (struct keyfield *key_arg)
2654 struct keyfield **p;
2655 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2657 for (p = &keylist; *p; p = &(*p)->next)
2663 /* Report a bad field specification SPEC, with extra info MSGID. */
2665 static void badfieldspec (char const *, char const *)
2668 badfieldspec (char const *spec, char const *msgid)
2670 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2671 _(msgid), quote (spec));
2675 /* Report incompatible options. */
2677 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2679 incompatible_options (char const *opts)
2681 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2685 /* Check compatibility of ordering options. */
2688 check_ordering_compatibility (void)
2690 struct keyfield const *key;
2692 for (key = keylist; key; key = key->next)
2693 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2695 || (key->random && key->translate))
2699 if (key->ignore == nondictionary)
2703 if (key->general_numeric)
2705 if (key->ignore == nonprinting)
2714 incompatible_options (opts);
2718 /* Parse the leading integer in STRING and store the resulting value
2719 (which must fit into size_t) into *VAL. Return the address of the
2720 suffix after the integer. If the value is too large, silently
2721 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2722 failure; otherwise, report MSGID and exit on failure. */
2725 parse_field_count (char const *string, size_t *val, char const *msgid)
2730 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2733 case LONGINT_INVALID_SUFFIX_CHAR:
2738 case LONGINT_OVERFLOW:
2739 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2743 case LONGINT_INVALID:
2745 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2746 _(msgid), quote (string));
2753 /* Handle interrupts and hangups. */
2756 sighandler (int sig)
2759 signal (sig, SIG_IGN);
2763 signal (sig, SIG_DFL);
2767 /* Set the ordering options for KEY specified in S.
2768 Return the address of the first character in S that
2769 is not a valid ordering option.
2770 BLANKTYPE is the kind of blanks that 'b' should skip. */
2773 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2780 if (blanktype == bl_start || blanktype == bl_both)
2781 key->skipsblanks = true;
2782 if (blanktype == bl_end || blanktype == bl_both)
2783 key->skipeblanks = true;
2786 key->ignore = nondictionary;
2789 key->translate = fold_toupper;
2792 key->general_numeric = true;
2795 /* Option order should not matter, so don't let -i override
2796 -d. -d implies -i, but -i does not imply -d. */
2798 key->ignore = nonprinting;
2804 key->numeric = true;
2810 key->reverse = true;
2820 static struct keyfield *
2821 key_init (struct keyfield *key)
2823 memset (key, 0, sizeof *key);
2824 key->eword = SIZE_MAX;
2829 main (int argc, char **argv)
2831 struct keyfield *key;
2832 struct keyfield key_buf;
2833 struct keyfield gkey;
2837 bool mergeonly = false;
2838 char *random_source = NULL;
2839 bool need_random = false;
2841 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2842 bool obsolete_usage = (posix2_version () < 200112);
2844 char *files_from = NULL;
2846 char const *outfile = NULL;
2848 initialize_main (&argc, &argv);
2849 set_program_name (argv[0]);
2850 setlocale (LC_ALL, "");
2851 bindtextdomain (PACKAGE, LOCALEDIR);
2852 textdomain (PACKAGE);
2854 initialize_exit_failure (SORT_FAILURE);
2856 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2857 #if HAVE_NL_LANGINFO
2858 hard_LC_TIME = hard_locale (LC_TIME);
2861 /* Get locale's representation of the decimal point. */
2863 struct lconv const *locale = localeconv ();
2865 /* If the locale doesn't define a decimal point, or if the decimal
2866 point is multibyte, use the C locale's decimal point. FIXME:
2867 add support for multibyte decimal points. */
2868 decimal_point = to_uchar (locale->decimal_point[0]);
2869 if (! decimal_point || locale->decimal_point[1])
2870 decimal_point = '.';
2872 /* FIXME: add support for multibyte thousands separators. */
2873 thousands_sep = to_uchar (*locale->thousands_sep);
2874 if (! thousands_sep || locale->thousands_sep[1])
2878 have_read_stdin = false;
2883 static int const sig[] =
2885 /* The usual suspects. */
2886 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2903 enum { nsigs = sizeof sig / sizeof sig[0] };
2906 struct sigaction act;
2908 sigemptyset (&caught_signals);
2909 for (i = 0; i < nsigs; i++)
2911 sigaction (sig[i], NULL, &act);
2912 if (act.sa_handler != SIG_IGN)
2913 sigaddset (&caught_signals, sig[i]);
2916 act.sa_handler = sighandler;
2917 act.sa_mask = caught_signals;
2920 for (i = 0; i < nsigs; i++)
2921 if (sigismember (&caught_signals, sig[i]))
2922 sigaction (sig[i], &act, NULL);
2924 for (i = 0; i < nsigs; i++)
2925 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2927 signal (sig[i], sighandler);
2928 siginterrupt (sig[i], 1);
2933 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2934 atexit (exit_cleanup);
2936 gkey.sword = gkey.eword = SIZE_MAX;
2938 gkey.translate = NULL;
2939 gkey.numeric = gkey.general_numeric = gkey.random = false;
2940 gkey.month = gkey.reverse = false;
2941 gkey.skipsblanks = gkey.skipeblanks = false;
2943 files = xnmalloc (argc, sizeof *files);
2947 /* Parse an operand as a file after "--" was seen; or if
2948 pedantic and a file was seen, unless the POSIX version
2949 predates 1003.1-2001 and -c was not seen and the operand is
2950 "-o FILE" or "-oFILE". */
2954 || (posixly_correct && nfiles != 0
2955 && ! (obsolete_usage
2958 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2959 && (argv[optind][2] || optind + 1 != argc)))
2960 || ((c = getopt_long (argc, argv, short_options,
2966 files[nfiles++] = argv[optind++];
2972 if (optarg[0] == '+')
2974 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2975 && ISDIGIT (argv[optind][1]));
2976 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2979 /* Treat +POS1 [-POS2] as a key if possible; but silently
2980 treat an operand as a file if it is not a valid +POS1. */
2981 key = key_init (&key_buf);
2982 s = parse_field_count (optarg + 1, &key->sword, NULL);
2984 s = parse_field_count (s + 1, &key->schar, NULL);
2985 if (! (key->sword | key->schar))
2986 key->sword = SIZE_MAX;
2987 if (! s || *set_ordering (s, key, bl_start))
2991 if (minus_pos_usage)
2993 char const *optarg1 = argv[optind++];
2994 s = parse_field_count (optarg1 + 1, &key->eword,
2995 N_("invalid number after `-'"));
2997 s = parse_field_count (s + 1, &key->echar,
2998 N_("invalid number after `.'"));
2999 if (*set_ordering (s, key, bl_end))
3000 badfieldspec (optarg1,
3001 N_("stray character in field spec"));
3008 files[nfiles++] = optarg;
3012 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3027 set_ordering (str, &gkey, bl_both);
3033 ? XARGMATCH ("--check", optarg, check_args, check_types)
3038 if (checkonly && checkonly != c)
3039 incompatible_options ("cC");
3043 case COMPRESS_PROGRAM_OPTION:
3044 if (compress_program && !STREQ (compress_program, optarg))
3045 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3046 compress_program = optarg;
3049 case FILES0_FROM_OPTION:
3050 files_from = optarg;
3054 key = key_init (&key_buf);
3057 s = parse_field_count (optarg, &key->sword,
3058 N_("invalid number at field start"));
3061 /* Provoke with `sort -k0' */
3062 badfieldspec (optarg, N_("field number is zero"));
3066 s = parse_field_count (s + 1, &key->schar,
3067 N_("invalid number after `.'"));
3070 /* Provoke with `sort -k1.0' */
3071 badfieldspec (optarg, N_("character offset is zero"));
3074 if (! (key->sword | key->schar))
3075 key->sword = SIZE_MAX;
3076 s = set_ordering (s, key, bl_start);
3079 key->eword = SIZE_MAX;
3085 s = parse_field_count (s + 1, &key->eword,
3086 N_("invalid number after `,'"));
3089 /* Provoke with `sort -k1,0' */
3090 badfieldspec (optarg, N_("field number is zero"));
3093 s = parse_field_count (s + 1, &key->echar,
3094 N_("invalid number after `.'"));
3097 /* `-k 2,3' is equivalent to `+1 -3'. */
3100 s = set_ordering (s, key, bl_end);
3103 badfieldspec (optarg, N_("stray character in field spec"));
3112 specify_nmerge (oi, c, optarg);
3116 if (outfile && !STREQ (outfile, optarg))
3117 error (SORT_FAILURE, 0, _("multiple output files specified"));
3121 case RANDOM_SOURCE_OPTION:
3122 if (random_source && !STREQ (random_source, optarg))
3123 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3124 random_source = optarg;
3132 specify_sort_size (oi, c, optarg);
3137 char newtab = optarg[0];
3139 error (SORT_FAILURE, 0, _("empty tab"));
3142 if (STREQ (optarg, "\\0"))
3146 /* Provoke with `sort -txx'. Complain about
3147 "multi-character tab" instead of "multibyte tab", so
3148 that the diagnostic's wording does not need to be
3149 changed once multibyte characters are supported. */
3150 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3154 if (tab != TAB_DEFAULT && tab != newtab)
3155 error (SORT_FAILURE, 0, _("incompatible tabs"));
3161 add_temp_dir (optarg);
3169 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3170 through Solaris 7. It is also accepted by many non-Solaris
3171 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3172 -y is marked as obsolete starting with Solaris 8 (1999), but is
3173 still accepted as of Solaris 10 prerelease (2004).
3175 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3176 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3177 and which in general ignores the argument after "-y" if it
3178 consists entirely of digits (it can even be empty). */
3179 if (optarg == argv[optind - 1])
3182 for (p = optarg; ISDIGIT (*p); p++)
3184 optind -= (*p != '\0');
3192 case_GETOPT_HELP_CHAR;
3194 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3197 usage (SORT_FAILURE);
3205 /* When using --files0-from=F, you may not specify any files
3206 on the command-line. */
3209 error (0, 0, _("extra operand %s"), quote (files[0]));
3210 fprintf (stderr, "%s\n",
3211 _("file operands cannot be combined with --files0-from"));
3212 usage (SORT_FAILURE);
3215 if (STREQ (files_from, "-"))
3219 stream = fopen (files_from, "r");
3221 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3222 quote (files_from));
3225 readtokens0_init (&tok);
3227 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3228 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3229 quote (files_from));
3237 for (i = 0; i < nfiles; i++)
3239 if (STREQ (files[i], "-"))
3240 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3241 "no file name of %s allowed"),
3243 else if (files[i][0] == '\0')
3245 /* Using the standard `filename:line-number:' prefix here is
3246 not totally appropriate, since NUL is the separator, not NL,
3247 but it might be better than nothing. */
3248 unsigned long int file_number = i + 1;
3249 error (SORT_FAILURE, 0,
3250 _("%s:%lu: invalid zero-length file name"),
3251 quotearg_colon (files_from), file_number);
3256 error (SORT_FAILURE, 0, _("no input from %s"),
3257 quote (files_from));
3260 /* Inheritance of global options to individual keys. */
3261 for (key = keylist; key; key = key->next)
3263 if (! (key->ignore || key->translate
3264 || (key->skipsblanks | key->reverse
3265 | key->skipeblanks | key->month | key->numeric
3266 | key->general_numeric
3269 key->ignore = gkey.ignore;
3270 key->translate = gkey.translate;
3271 key->skipsblanks = gkey.skipsblanks;
3272 key->skipeblanks = gkey.skipeblanks;
3273 key->month = gkey.month;
3274 key->numeric = gkey.numeric;
3275 key->general_numeric = gkey.general_numeric;
3276 key->random = gkey.random;
3277 key->reverse = gkey.reverse;
3280 need_random |= key->random;
3283 if (!keylist && (gkey.ignore || gkey.translate
3284 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3285 | gkey.numeric | gkey.general_numeric
3289 need_random |= gkey.random;
3292 check_ordering_compatibility ();
3294 reverse = gkey.reverse;
3298 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3299 if (! randread_source)
3300 die (_("open failed"), random_source);
3303 if (temp_dir_count == 0)
3305 char const *tmp_dir = getenv ("TMPDIR");
3306 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3311 static char *minus = "-";
3317 /* Need to re-check that we meet the minimum requirement for memory
3318 usage with the final value for NMERGE. */
3320 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3325 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3326 quote (files[1]), checkonly);
3330 static char opts[] = {0, 'o', 0};
3331 opts[0] = checkonly;
3332 incompatible_options (opts);
3335 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3336 input is not properly sorted. */
3337 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3342 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3345 for (i = 0; i < nfiles; ++i)
3346 sortfiles[i].name = files[i];
3348 merge (sortfiles, 0, nfiles, outfile);
3349 IF_LINT (free (sortfiles));
3352 sort (files, nfiles, outfile);
3354 if (have_read_stdin && fclose (stdin) == EOF)
3355 die (_("close failed"), "-");
3357 exit (EXIT_SUCCESS);