1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
79 #if !defined OPEN_MAX && defined NR_OPEN
80 # define OPEN_MAX NR_OPEN
86 #define UCHAR_LIM (UCHAR_MAX + 1)
88 #ifndef DEFAULT_TMPDIR
89 # define DEFAULT_TMPDIR "/tmp"
95 /* POSIX says to exit with status 1 if invoked with -c and the
96 input is not properly sorted. */
97 SORT_OUT_OF_ORDER = 1,
99 /* POSIX says any other irregular exit must exit with a status
100 code greater than 1. */
106 /* The number of times we should try to fork a compression process
107 (we retry if the fork call fails). We don't _need_ to compress
108 temp files, this is just to reduce disk access, so this number
110 MAX_FORK_TRIES_COMPRESS = 2,
112 /* The number of times we should try to fork a decompression process.
113 If we can't fork a decompression process, we can't sort, so this
114 number should be big. */
115 MAX_FORK_TRIES_DECOMPRESS = 8
118 /* The representation of the decimal point in the current locale. */
119 static int decimal_point;
121 /* Thousands separator; if -1, then there isn't one. */
122 static int thousands_sep;
124 /* Nonzero if the corresponding locales are hard. */
125 static bool hard_LC_COLLATE;
127 static bool hard_LC_TIME;
130 #define NONZERO(x) ((x) != 0)
132 /* The kind of blanks for '-b' to skip in various options. */
133 enum blanktype { bl_start, bl_end, bl_both };
135 /* The character marking end of line. Default to \n. */
136 static char eolchar = '\n';
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 size_t length; /* Length including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
150 char *buf; /* Dynamically allocated buffer,
151 partitioned into 3 regions:
154 - an array of lines, in reverse order. */
155 size_t used; /* Number of bytes used for input data. */
156 size_t nlines; /* Number of lines in the line array. */
157 size_t alloc; /* Number of bytes allocated. */
158 size_t left; /* Number of bytes left from previous reads. */
159 size_t line_bytes; /* Number of bytes to reserve for each line. */
160 bool eof; /* An EOF has been read. */
165 size_t sword; /* Zero-origin 'word' to start at. */
166 size_t schar; /* Additional characters to skip. */
167 size_t eword; /* Zero-origin first word after field. */
168 size_t echar; /* Additional characters in field. */
169 bool const *ignore; /* Boolean array of characters to ignore. */
170 char const *translate; /* Translation applied to characters. */
171 bool skipsblanks; /* Skip leading blanks when finding start. */
172 bool skipeblanks; /* Skip leading blanks when finding end. */
173 bool numeric; /* Flag for numeric comparison. Handle
174 strings of digits with optional decimal
175 point, but no exponential notation. */
176 bool random; /* Sort by random hash of key. */
177 bool general_numeric; /* Flag for general, numeric comparison.
178 Handle numbers in exponential notation. */
179 bool month; /* Flag for comparison by month name. */
180 bool reverse; /* Reverse the sense of comparison. */
181 struct keyfield *next; /* Next keyfield to try. */
190 /* FIXME: None of these tables work with multibyte character sets.
191 Also, there are many other bugs when handling multibyte characters.
192 One way to fix this is to rewrite `sort' to use wide characters
193 internally, but doing this with good performance is a bit
196 /* Table of blanks. */
197 static bool blanks[UCHAR_LIM];
199 /* Table of non-printing characters. */
200 static bool nonprinting[UCHAR_LIM];
202 /* Table of non-dictionary characters (not letters, digits, or blanks). */
203 static bool nondictionary[UCHAR_LIM];
205 /* Translation table folding lower case to upper. */
206 static char fold_toupper[UCHAR_LIM];
208 #define MONTHS_PER_YEAR 12
210 /* Table mapping month names to integers.
211 Alphabetic order allows binary search. */
212 static struct month monthtab[] =
228 /* During the merge phase, the number of files to merge at once. */
229 #define NMERGE_DEFAULT 16
231 /* Minimum size for a merge or check buffer. */
232 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
234 /* Minimum sort size; the code might not work with smaller sizes. */
235 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
237 /* The number of bytes needed for a merge or check buffer, which can
238 function relatively efficiently even if it holds only one line. If
239 a longer line is seen, this value is increased. */
240 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
242 /* The approximate maximum number of bytes of main memory to use, as
243 specified by the user. Zero if the user has not specified a size. */
244 static size_t sort_size;
246 /* The guessed size for non-regular files. */
247 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
249 /* Array of directory names in which any temporary files are to be created. */
250 static char const **temp_dirs;
252 /* Number of temporary directory names used. */
253 static size_t temp_dir_count;
255 /* Number of allocated slots in temp_dirs. */
256 static size_t temp_dir_alloc;
258 /* Flag to reverse the order of all comparisons. */
261 /* Flag for stable sort. This turns off the last ditch bytewise
262 comparison of lines, and instead leaves lines in the same order
263 they were read if all keys compare equal. */
266 /* If TAB has this value, blanks separate fields. */
267 enum { TAB_DEFAULT = CHAR_MAX + 1 };
269 /* Tab character separating fields. If TAB_DEFAULT, then fields are
270 separated by the empty string between a non-blank character and a blank
272 static int tab = TAB_DEFAULT;
274 /* Flag to remove consecutive duplicate lines from the output.
275 Only the last of a sequence of equal lines will be output. */
278 /* Nonzero if any of the input files are the standard input. */
279 static bool have_read_stdin;
281 /* List of key field comparisons to be tried. */
282 static struct keyfield *keylist;
284 /* Program used to (de)compress temp files. Must accept -d. */
285 static char const *compress_program;
287 /* Maximum number of files to merge in one go. If more than this
288 number are present, temp files will be used. */
289 static unsigned int nmerge = NMERGE_DEFAULT;
291 static void sortlines_temp (struct line *, size_t, struct line *);
293 /* Report MESSAGE for FILE, then clean up and exit.
294 If FILE is null, it represents standard output. */
296 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
298 die (char const *message, char const *file)
300 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
307 if (status != EXIT_SUCCESS)
308 fprintf (stderr, _("Try `%s --help' for more information.\n"),
313 Usage: %s [OPTION]... [FILE]...\n\
314 or: %s [OPTION]... --files0-from=F\n\
316 program_name, program_name);
318 Write sorted concatenation of all FILE(s) to standard output.\n\
322 Mandatory arguments to long options are mandatory for short options too.\n\
329 -b, --ignore-leading-blanks ignore leading blanks\n\
330 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
331 -f, --ignore-case fold lower case to upper case characters\n\
334 -g, --general-numeric-sort compare according to general numerical value\n\
335 -i, --ignore-nonprinting consider only printable characters\n\
336 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
337 -n, --numeric-sort compare according to string numerical value\n\
338 -R, --random-sort sort by random hash of keys\n\
339 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
340 --sort=WORD sort according to WORD:\n\
341 general-numeric -g, month -M, numeric -n,\n\
343 -r, --reverse reverse the result of comparisons\n\
351 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
352 for more use temp files\n\
355 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
356 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
357 --compress-program=PROG compress temporaries with PROG;\n\
358 decompress them with PROG -d\n\
359 --files0-from=F read input from the files specified by\n\
360 NUL-terminated names in file F\n\
363 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
364 -m, --merge merge already sorted files; do not sort\n\
367 -o, --output=FILE write result to FILE instead of standard output\n\
368 -s, --stable stabilize sort by disabling last-resort comparison\n\
369 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
372 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
373 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
374 multiple options specify multiple directories\n\
375 -u, --unique with -c, check for strict ordering;\n\
376 without -c, output only the first of an equal run\n\
379 -z, --zero-terminated end lines with 0 byte, not newline\n\
381 fputs (HELP_OPTION_DESCRIPTION, stdout);
382 fputs (VERSION_OPTION_DESCRIPTION, stdout);
385 POS is F[.C][OPTS], where F is the field number and C the character position\n\
386 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
387 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
388 one or more single-letter ordering options, which override global ordering\n\
389 options for that key. If no key is given, use the entire line as the key.\n\
391 SIZE may be followed by the following multiplicative suffixes:\n\
394 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
396 With no FILE, or when FILE is -, read standard input.\n\
399 The locale specified by the environment affects sort order.\n\
400 Set LC_ALL=C to get the traditional sort order that uses\n\
401 native byte values.\n\
403 emit_bug_reporting_address ();
409 /* For long options that have no equivalent short option, use a
410 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
413 CHECK_OPTION = CHAR_MAX + 1,
414 COMPRESS_PROGRAM_OPTION,
417 RANDOM_SOURCE_OPTION,
421 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
423 static struct option const long_options[] =
425 {"ignore-leading-blanks", no_argument, NULL, 'b'},
426 {"check", optional_argument, NULL, CHECK_OPTION},
427 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
428 {"dictionary-order", no_argument, NULL, 'd'},
429 {"ignore-case", no_argument, NULL, 'f'},
430 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
431 {"general-numeric-sort", no_argument, NULL, 'g'},
432 {"ignore-nonprinting", no_argument, NULL, 'i'},
433 {"key", required_argument, NULL, 'k'},
434 {"merge", no_argument, NULL, 'm'},
435 {"month-sort", no_argument, NULL, 'M'},
436 {"numeric-sort", no_argument, NULL, 'n'},
437 {"random-sort", no_argument, NULL, 'R'},
438 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
439 {"sort", required_argument, NULL, SORT_OPTION},
440 {"output", required_argument, NULL, 'o'},
441 {"reverse", no_argument, NULL, 'r'},
442 {"stable", no_argument, NULL, 's'},
443 {"batch-size", required_argument, NULL, NMERGE_OPTION},
444 {"buffer-size", required_argument, NULL, 'S'},
445 {"field-separator", required_argument, NULL, 't'},
446 {"temporary-directory", required_argument, NULL, 'T'},
447 {"unique", no_argument, NULL, 'u'},
448 {"zero-terminated", no_argument, NULL, 'z'},
449 {GETOPT_HELP_OPTION_DECL},
450 {GETOPT_VERSION_OPTION_DECL},
454 static char const *const check_args[] =
456 "quiet", "silent", "diagnose-first", NULL
458 static char const check_types[] =
462 ARGMATCH_VERIFY (check_args, check_types);
464 static char const *const sort_args[] =
466 "general-numeric", "month", "numeric", "random", NULL
468 static char const sort_types[] =
472 ARGMATCH_VERIFY (sort_args, sort_types);
474 /* The set of signals that are caught. */
475 static sigset_t caught_signals;
477 /* Critical section status. */
484 /* Enter a critical section. */
485 static struct cs_status
488 struct cs_status status;
489 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
493 /* Leave a critical section. */
495 cs_leave (struct cs_status status)
499 /* Ignore failure when restoring the signal mask. */
500 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
504 /* The list of temporary files. */
507 struct tempnode *volatile next;
508 pid_t pid; /* If compressed, the pid of compressor, else zero */
509 char name[1]; /* Actual size is 1 + file name length. */
511 static struct tempnode *volatile temphead;
512 static struct tempnode *volatile *temptail = &temphead;
517 pid_t pid; /* If compressed, the pid of compressor, else zero */
520 /* A table where we store compression process states. We clean up all
521 processes in a timely manner so as not to exhaust system resources,
522 so we store the info on whether the process is still running, or has
524 static Hash_table *proctab;
526 enum { INIT_PROCTAB_SIZE = 47 };
528 enum procstate { ALIVE, ZOMBIE };
530 /* A proctab entry. The COUNT field is there in case we fork a new
531 compression process that has the same PID as an old zombie process
532 that is still in the table (because the process to decompress the
533 temp file it was associated with hasn't started yet). */
537 enum procstate state;
542 proctab_hasher (const void *entry, size_t tabsize)
544 const struct procnode *node = entry;
545 return node->pid % tabsize;
549 proctab_comparator (const void *e1, const void *e2)
551 const struct procnode *n1 = e1, *n2 = e2;
552 return n1->pid == n2->pid;
555 /* The total number of forked processes (compressors and decompressors)
556 that have not been reaped yet. */
557 static size_t nprocs;
559 /* The number of child processes we'll allow before we try to reap some. */
560 enum { MAX_PROCS_BEFORE_REAP = 2 };
562 /* If 0 < PID, wait for the child process with that PID to exit.
563 If PID is -1, clean up a random child process which has finished and
564 return the process ID of that child. If PID is -1 and no processes
565 have quit yet, return 0 without waiting. */
571 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
574 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
578 if (! WIFEXITED (status) || WEXITSTATUS (status))
579 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
587 /* Add the PID of a running compression process to proctab, or update
588 the entry COUNT and STATE fields if it's already there. This also
589 creates the table for us the first time it's called. */
592 register_proc (pid_t pid)
594 struct procnode test, *node;
598 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
607 node = hash_lookup (proctab, &test);
615 node = xmalloc (sizeof *node);
619 hash_insert (proctab, node);
623 /* This is called when we reap a random process. We don't know
624 whether we have reaped a compression process or a decompression
625 process until we look in the table. If there's an ALIVE entry for
626 it, then we have reaped a compression process, so change the state
627 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
630 update_proc (pid_t pid)
632 struct procnode test, *node;
635 node = hash_lookup (proctab, &test);
637 node->state = ZOMBIE;
640 /* This is for when we need to wait for a compression process to exit.
641 If it has a ZOMBIE entry in the table then it's already dead and has
642 been reaped. Note that if there's an ALIVE entry for it, it still may
643 already have died and been reaped if a second process was created with
644 the same PID. This is probably exceedingly rare, but to be on the safe
645 side we will have to wait for any compression process with this PID. */
648 wait_proc (pid_t pid)
650 struct procnode test, *node;
653 node = hash_lookup (proctab, &test);
654 if (node->state == ALIVE)
657 node->state = ZOMBIE;
660 hash_delete (proctab, node);
665 /* Keep reaping finished children as long as there are more to reap.
666 This doesn't block waiting for any of them, it only reaps those
667 that are already dead. */
674 while (0 < nprocs && (pid = reap (-1)))
678 /* Clean up any remaining temporary files. */
683 struct tempnode const *node;
685 for (node = temphead; node; node = node->next)
690 /* Cleanup actions to take when exiting. */
697 /* Clean up any remaining temporary files in a critical section so
698 that a signal handler does not try to clean them too. */
699 struct cs_status cs = cs_enter ();
707 /* Create a new temporary file, returning its newly allocated tempnode.
708 Store into *PFD the file descriptor open for writing. */
710 static struct tempnode *
711 create_temp_file (int *pfd)
713 static char const slashbase[] = "/sortXXXXXX";
714 static size_t temp_dir_index;
717 char const *temp_dir = temp_dirs[temp_dir_index];
718 size_t len = strlen (temp_dir);
719 struct tempnode *node =
720 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
721 char *file = node->name;
724 memcpy (file, temp_dir, len);
725 memcpy (file + len, slashbase, sizeof slashbase);
728 if (++temp_dir_index == temp_dir_count)
731 /* Create the temporary file in a critical section, to avoid races. */
737 temptail = &node->next;
744 die (_("cannot create temporary file"), file);
750 /* Return a stream for FILE, opened with mode HOW. A null FILE means
751 standard output; HOW should be "w". When opening for input, "-"
752 means standard input. To avoid confusion, do not return file
753 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
754 opening an ordinary FILE. */
757 xfopen (const char *file, const char *how)
763 else if (STREQ (file, "-") && *how == 'r')
765 have_read_stdin = true;
770 fp = fopen (file, how);
772 die (_("open failed"), file);
778 /* Close FP, whose name is FILE, and report any errors. */
781 xfclose (FILE *fp, char const *file)
786 /* Allow reading stdin from tty more than once. */
792 /* Don't close stdout just yet. close_stdout does that. */
793 if (fflush (fp) != 0)
794 die (_("fflush failed"), file);
798 if (fclose (fp) != 0)
799 die (_("close failed"), file);
805 dup2_or_die (int oldfd, int newfd)
807 if (dup2 (oldfd, newfd) < 0)
808 error (SORT_FAILURE, errno, _("dup2 failed"));
811 /* Fork a child process for piping to and do common cleanup. The
812 TRIES parameter tells us how many times to try to fork before
813 giving up. Return the PID of the child or -1 if fork failed. */
816 pipe_fork (int pipefds[2], size_t tries)
818 #if HAVE_WORKING_FORK
819 struct tempnode *saved_temphead;
821 unsigned int wait_retry = 1;
822 pid_t pid IF_LINT (= -1);
825 if (pipe (pipefds) < 0)
830 /* This is so the child process won't delete our temp files
831 if it receives a signal before exec-ing. */
833 saved_temphead = temphead;
839 temphead = saved_temphead;
844 if (0 <= pid || errno != EAGAIN)
861 close (STDIN_FILENO);
862 close (STDOUT_FILENO);
869 #else /* ! HAVE_WORKING_FORK */
874 /* Create a temporary file and start a compression program to filter output
875 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
876 set it to the PID of the newly-created process. */
879 create_temp (FILE **pfp, pid_t *ppid)
882 struct tempnode *node = create_temp_file (&tempfd);
883 char *name = node->name;
885 if (compress_program)
889 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
896 register_proc (node->pid);
898 else if (node->pid == 0)
901 dup2_or_die (tempfd, STDOUT_FILENO);
903 dup2_or_die (pipefds[0], STDIN_FILENO);
906 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
907 error (SORT_FAILURE, errno, _("couldn't execute %s"),
914 *pfp = fdopen (tempfd, "w");
916 die (_("couldn't create temporary file"), name);
924 /* Open a compressed temp file and start a decompression process through
925 which to filter the input. PID must be the valid processes ID of the
926 process used to compress the file. */
929 open_temp (const char *name, pid_t pid)
931 int tempfd, pipefds[2];
937 tempfd = open (name, O_RDONLY);
939 die (_("couldn't open temporary file"), name);
941 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
947 else if (child_pid == 0)
950 dup2_or_die (tempfd, STDIN_FILENO);
952 dup2_or_die (pipefds[1], STDOUT_FILENO);
955 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
956 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
960 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
963 fp = fdopen (pipefds[0], "r");
965 die (_("couldn't create temporary file"), name);
971 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
973 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
974 die (_("write failed"), output_file);
977 /* Append DIR to the array of temporary directory names. */
979 add_temp_dir (char const *dir)
981 if (temp_dir_count == temp_dir_alloc)
982 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
984 temp_dirs[temp_dir_count++] = dir;
987 /* Remove NAME from the list of temporary files. */
990 zaptemp (const char *name)
992 struct tempnode *volatile *pnode;
993 struct tempnode *node;
994 struct tempnode *next;
996 int unlink_errno = 0;
999 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1002 /* Unlink the temporary file in a critical section to avoid races. */
1005 unlink_status = unlink (name);
1006 unlink_errno = errno;
1010 if (unlink_status != 0)
1011 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1017 #if HAVE_NL_LANGINFO
1020 struct_month_cmp (const void *m1, const void *m2)
1022 struct month const *month1 = m1;
1023 struct month const *month2 = m2;
1024 return strcmp (month1->name, month2->name);
1029 /* Initialize the character class tables. */
1036 for (i = 0; i < UCHAR_LIM; ++i)
1038 blanks[i] = !! isblank (i);
1039 nonprinting[i] = ! isprint (i);
1040 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1041 fold_toupper[i] = toupper (i);
1044 #if HAVE_NL_LANGINFO
1045 /* If we're not in the "C" locale, read different names for months. */
1048 for (i = 0; i < MONTHS_PER_YEAR; i++)
1055 s = (char *) nl_langinfo (ABMON_1 + i);
1057 monthtab[i].name = name = xmalloc (s_len + 1);
1058 monthtab[i].val = i + 1;
1060 for (j = 0; j < s_len; j++)
1061 name[j] = fold_toupper[to_uchar (s[j])];
1064 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1065 sizeof *monthtab, struct_month_cmp);
1070 /* Specify how many inputs may be merged at once.
1071 This may be set on the command-line with the
1072 --batch-size option. */
1074 specify_nmerge (int oi, char c, char const *s)
1077 struct rlimit rlimit;
1078 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1080 /* Try to find out how many file descriptors we'll be able
1081 to open. We need at least nmerge + 3 (STDIN_FILENO,
1082 STDOUT_FILENO and STDERR_FILENO). */
1083 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1088 if (e == LONGINT_OK)
1092 e = LONGINT_OVERFLOW;
1097 error (0, 0, _("invalid --%s argument %s"),
1098 long_options[oi].name, quote(s));
1099 error (SORT_FAILURE, 0,
1100 _("minimum --%s argument is %s"),
1101 long_options[oi].name, quote("2"));
1103 else if (max_nmerge < nmerge)
1105 e = LONGINT_OVERFLOW;
1112 if (e == LONGINT_OVERFLOW)
1114 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1115 error (0, 0, _("--%s argument %s too large"),
1116 long_options[oi].name, quote(s));
1117 error (SORT_FAILURE, 0,
1118 _("maximum --%s argument with current rlimit is %s"),
1119 long_options[oi].name,
1120 uinttostr (max_nmerge, max_nmerge_buf));
1123 xstrtol_fatal (e, oi, c, long_options, s);
1126 /* Specify the amount of main memory to use when sorting. */
1128 specify_sort_size (int oi, char c, char const *s)
1132 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1134 /* The default unit is KiB. */
1135 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1137 if (n <= UINTMAX_MAX / 1024)
1140 e = LONGINT_OVERFLOW;
1143 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1144 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1153 double mem = physmem_total () * n / 100;
1155 /* Use "<", not "<=", to avoid problems with rounding. */
1156 if (mem < UINTMAX_MAX)
1162 e = LONGINT_OVERFLOW;
1167 if (e == LONGINT_OK)
1169 /* If multiple sort sizes are specified, take the maximum, so
1170 that option order does not matter. */
1177 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1181 e = LONGINT_OVERFLOW;
1184 xstrtol_fatal (e, oi, c, long_options, s);
1187 /* Return the default sort size. */
1189 default_sort_size (void)
1191 /* Let MEM be available memory or 1/8 of total memory, whichever
1193 double avail = physmem_available ();
1194 double total = physmem_total ();
1195 double mem = MAX (avail, total / 8);
1196 struct rlimit rlimit;
1198 /* Let SIZE be MEM, but no more than the maximum object size or
1199 system resource limits. Avoid the MIN macro here, as it is not
1200 quite right when only one argument is floating point. Don't
1201 bother to check for values like RLIM_INFINITY since in practice
1202 they are not much less than SIZE_MAX. */
1203 size_t size = SIZE_MAX;
1206 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1207 size = rlimit.rlim_cur;
1209 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1210 size = rlimit.rlim_cur;
1213 /* Leave a large safety margin for the above limits, as failure can
1214 occur when they are exceeded. */
1218 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1219 Exceeding RSS is not fatal, but can be quite slow. */
1220 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1221 size = rlimit.rlim_cur / 16 * 15;
1224 /* Use no less than the minimum. */
1225 return MAX (size, MIN_SORT_SIZE);
1228 /* Return the sort buffer size to use with the input files identified
1229 by FPS and FILES, which are alternate names of the same files.
1230 NFILES gives the number of input files; NFPS may be less. Assume
1231 that each input line requires LINE_BYTES extra bytes' worth of line
1232 information. Do not exceed the size bound specified by the user
1233 (or a default size bound, if the user does not specify one). */
1236 sort_buffer_size (FILE *const *fps, size_t nfps,
1237 char *const *files, size_t nfiles,
1240 /* A bound on the input size. If zero, the bound hasn't been
1242 static size_t size_bound;
1244 /* In the worst case, each input byte is a newline. */
1245 size_t worst_case_per_input_byte = line_bytes + 1;
1247 /* Keep enough room for one extra input line and an extra byte.
1248 This extra room might be needed when preparing to read EOF. */
1249 size_t size = worst_case_per_input_byte + 1;
1253 for (i = 0; i < nfiles; i++)
1259 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1260 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1261 : stat (files[i], &st))
1263 die (_("stat failed"), files[i]);
1265 if (S_ISREG (st.st_mode))
1266 file_size = st.st_size;
1269 /* The file has unknown size. If the user specified a sort
1270 buffer size, use that; otherwise, guess the size. */
1273 file_size = INPUT_FILE_SIZE_GUESS;
1278 size_bound = sort_size;
1280 size_bound = default_sort_size ();
1283 /* Add the amount of memory needed to represent the worst case
1284 where the input consists entirely of newlines followed by a
1285 single non-newline. Check for overflow. */
1286 worst_case = file_size * worst_case_per_input_byte + 1;
1287 if (file_size != worst_case / worst_case_per_input_byte
1288 || size_bound - size <= worst_case)
1296 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1297 must be at least sizeof (struct line). Allocate ALLOC bytes
1301 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1303 /* Ensure that the line array is properly aligned. If the desired
1304 size cannot be allocated, repeatedly halve it until allocation
1305 succeeds. The smaller allocation may hurt overall performance,
1306 but that's better than failing. */
1309 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1310 buf->buf = malloc (alloc);
1314 if (alloc <= line_bytes + 1)
1318 buf->line_bytes = line_bytes;
1320 buf->used = buf->left = buf->nlines = 0;
1324 /* Return one past the limit of the line array. */
1326 static inline struct line *
1327 buffer_linelim (struct buffer const *buf)
1329 return (struct line *) (buf->buf + buf->alloc);
1332 /* Return a pointer to the first character of the field specified
1336 begfield (const struct line *line, const struct keyfield *key)
1338 char *ptr = line->text, *lim = ptr + line->length - 1;
1339 size_t sword = key->sword;
1340 size_t schar = key->schar;
1341 size_t remaining_bytes;
1343 /* The leading field separator itself is included in a field when -t
1346 if (tab != TAB_DEFAULT)
1347 while (ptr < lim && sword--)
1349 while (ptr < lim && *ptr != tab)
1355 while (ptr < lim && sword--)
1357 while (ptr < lim && blanks[to_uchar (*ptr)])
1359 while (ptr < lim && !blanks[to_uchar (*ptr)])
1363 if (key->skipsblanks)
1364 while (ptr < lim && blanks[to_uchar (*ptr)])
1367 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1368 remaining_bytes = lim - ptr;
1369 if (schar < remaining_bytes)
1377 /* Return the limit of (a pointer to the first character after) the field
1378 in LINE specified by KEY. */
1381 limfield (const struct line *line, const struct keyfield *key)
1383 char *ptr = line->text, *lim = ptr + line->length - 1;
1384 size_t eword = key->eword, echar = key->echar;
1385 size_t remaining_bytes;
1387 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1388 whichever comes first. If there are more than EWORD fields, leave
1389 PTR pointing at the beginning of the field having zero-based index,
1390 EWORD. If a delimiter character was specified (via -t), then that
1391 `beginning' is the first character following the delimiting TAB.
1392 Otherwise, leave PTR pointing at the first `blank' character after
1393 the preceding field. */
1394 if (tab != TAB_DEFAULT)
1395 while (ptr < lim && eword--)
1397 while (ptr < lim && *ptr != tab)
1399 if (ptr < lim && (eword | echar))
1403 while (ptr < lim && eword--)
1405 while (ptr < lim && blanks[to_uchar (*ptr)])
1407 while (ptr < lim && !blanks[to_uchar (*ptr)])
1411 #ifdef POSIX_UNSPECIFIED
1412 /* The following block of code makes GNU sort incompatible with
1413 standard Unix sort, so it's ifdef'd out for now.
1414 The POSIX spec isn't clear on how to interpret this.
1415 FIXME: request clarification.
1417 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1418 Date: Thu, 30 May 96 12:20:41 -0400
1419 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1421 [...]I believe I've found another bug in `sort'.
1426 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1429 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1433 Unix sort produced the answer I expected: sort on the single character
1434 in column 7. GNU sort produced different results, because it disagrees
1435 on the interpretation of the key-end spec "M.N". Unix sort reads this
1436 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1437 "skip M-1 fields, then either N-1 characters or the rest of the current
1438 field, whichever comes first". This extra clause applies only to
1439 key-ends, not key-starts.
1442 /* Make LIM point to the end of (one byte past) the current field. */
1443 if (tab != TAB_DEFAULT)
1446 newlim = memchr (ptr, tab, lim - ptr);
1454 while (newlim < lim && blanks[to_uchar (*newlim)])
1456 while (newlim < lim && !blanks[to_uchar (*newlim)])
1462 /* If we're ignoring leading blanks when computing the End
1463 of the field, don't start counting bytes until after skipping
1464 past any leading blanks. */
1465 if (key->skipeblanks)
1466 while (ptr < lim && blanks[to_uchar (*ptr)])
1469 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1470 remaining_bytes = lim - ptr;
1471 if (echar < remaining_bytes)
1479 /* Fill BUF reading from FP, moving buf->left bytes from the end
1480 of buf->buf to the beginning first. If EOF is reached and the
1481 file wasn't terminated by a newline, supply one. Set up BUF's line
1482 table too. FILE is the name of the file corresponding to FP.
1483 Return true if some input was read. */
1486 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1488 struct keyfield const *key = keylist;
1490 size_t line_bytes = buf->line_bytes;
1491 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1496 if (buf->used != buf->left)
1498 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1499 buf->used = buf->left;
1505 char *ptr = buf->buf + buf->used;
1506 struct line *linelim = buffer_linelim (buf);
1507 struct line *line = linelim - buf->nlines;
1508 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1509 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1511 while (line_bytes + 1 < avail)
1513 /* Read as many bytes as possible, but do not read so many
1514 bytes that there might not be enough room for the
1515 corresponding line array. The worst case is when the
1516 rest of the input file consists entirely of newlines,
1517 except that the last byte is not a newline. */
1518 size_t readsize = (avail - 1) / (line_bytes + 1);
1519 size_t bytes_read = fread (ptr, 1, readsize, fp);
1520 char *ptrlim = ptr + bytes_read;
1522 avail -= bytes_read;
1524 if (bytes_read != readsize)
1527 die (_("read failed"), file);
1531 if (buf->buf == ptrlim)
1533 if (ptrlim[-1] != eol)
1538 /* Find and record each line in the just-read input. */
1539 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1543 line->text = line_start;
1544 line->length = ptr - line_start;
1545 mergesize = MAX (mergesize, line->length);
1546 avail -= line_bytes;
1550 /* Precompute the position of the first key for
1552 line->keylim = (key->eword == SIZE_MAX
1554 : limfield (line, key));
1556 if (key->sword != SIZE_MAX)
1557 line->keybeg = begfield (line, key);
1560 if (key->skipsblanks)
1561 while (blanks[to_uchar (*line_start)])
1563 line->keybeg = line_start;
1575 buf->used = ptr - buf->buf;
1576 buf->nlines = buffer_linelim (buf) - line;
1577 if (buf->nlines != 0)
1579 buf->left = ptr - line_start;
1580 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1585 /* The current input line is too long to fit in the buffer.
1586 Double the buffer size and try again, keeping it properly
1588 size_t line_alloc = buf->alloc / sizeof (struct line);
1589 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1590 buf->alloc = line_alloc * sizeof (struct line);
1595 /* Compare strings A and B as numbers without explicitly converting them to
1596 machine numbers. Comparatively slow for short strings, but asymptotically
1600 numcompare (const char *a, const char *b)
1602 while (blanks[to_uchar (*a)])
1604 while (blanks[to_uchar (*b)])
1607 return strnumcmp (a, b, decimal_point, thousands_sep);
1611 general_numcompare (const char *sa, const char *sb)
1613 /* FIXME: add option to warn about failed conversions. */
1614 /* FIXME: maybe add option to try expensive FP conversion
1615 only if A and B can't be compared more cheaply/accurately. */
1619 double a = strtod (sa, &ea);
1620 double b = strtod (sb, &eb);
1622 /* Put conversion errors at the start of the collating sequence. */
1624 return sb == eb ? 0 : -1;
1628 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1629 conversion errors but before numbers; sort them by internal
1630 bit-pattern, for lack of a more portable alternative. */
1636 : memcmp ((char *) &a, (char *) &b, sizeof a));
1639 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1640 Return 0 if the name in S is not recognized. */
1643 getmonth (char const *month, size_t len)
1646 size_t hi = MONTHS_PER_YEAR;
1647 char const *monthlim = month + len;
1651 if (month == monthlim)
1653 if (!blanks[to_uchar (*month)])
1660 size_t ix = (lo + hi) / 2;
1661 char const *m = month;
1662 char const *n = monthtab[ix].name;
1667 return monthtab[ix].val;
1668 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1673 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1685 /* A source of random data. */
1686 static struct randread_source *randread_source;
1688 /* Return the Ith randomly-generated state. The caller must invoke
1689 random_state (H) for all H less than I before invoking random_state
1692 static struct md5_ctx
1693 random_state (size_t i)
1695 /* An array of states resulting from the random data, and counts of
1696 its used and allocated members. */
1697 static struct md5_ctx *state;
1699 static size_t allocated;
1701 struct md5_ctx *s = &state[i];
1705 unsigned char buf[MD5_DIGEST_SIZE];
1711 state = X2NREALLOC (state, &allocated);
1715 randread (randread_source, buf, sizeof buf);
1717 md5_process_bytes (buf, sizeof buf, s);
1723 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1724 with length LENGTHB. Return negative if less, zero if equal,
1725 positive if greater. */
1728 cmp_hashes (char const *texta, size_t lena,
1729 char const *textb, size_t lenb)
1731 /* Try random hashes until a pair of hashes disagree. But if the
1732 first pair of random hashes agree, check whether the keys are
1733 identical and if so report no difference. */
1738 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1739 struct md5_ctx s[2];
1740 s[0] = s[1] = random_state (i);
1741 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1742 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1743 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1746 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1753 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1754 using one or more random hash functions. */
1757 compare_random (char *restrict texta, size_t lena,
1758 char *restrict textb, size_t lenb)
1762 if (! hard_LC_COLLATE)
1763 diff = cmp_hashes (texta, lena, textb, lenb);
1766 /* Transform the text into the basis of comparison, so that byte
1767 strings that would otherwise considered to be equal are
1768 considered equal here even if their bytes differ. */
1771 char stackbuf[4000];
1772 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1773 bool a_fits = tlena <= sizeof stackbuf;
1774 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1775 (a_fits ? sizeof stackbuf - tlena : 0),
1778 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1782 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1783 faster by avoiding the need for an extra buffer copy. */
1784 buf = xmalloc (tlena + tlenb + 1);
1785 xmemxfrm (buf, tlena + 1, texta, lena);
1786 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1789 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1791 if (buf != stackbuf)
1798 /* Compare two lines A and B trying every key in sequence until there
1799 are no more keys or a difference is found. */
1802 keycompare (const struct line *a, const struct line *b)
1804 struct keyfield const *key = keylist;
1806 /* For the first iteration only, the key positions have been
1807 precomputed for us. */
1808 char *texta = a->keybeg;
1809 char *textb = b->keybeg;
1810 char *lima = a->keylim;
1811 char *limb = b->keylim;
1817 char const *translate = key->translate;
1818 bool const *ignore = key->ignore;
1820 /* Find the lengths. */
1821 size_t lena = lima <= texta ? 0 : lima - texta;
1822 size_t lenb = limb <= textb ? 0 : limb - textb;
1824 /* Actually compare the fields. */
1827 diff = compare_random (texta, lena, textb, lenb);
1828 else if (key->numeric | key->general_numeric)
1830 char savea = *lima, saveb = *limb;
1832 *lima = *limb = '\0';
1833 diff = ((key->numeric ? numcompare : general_numcompare)
1835 *lima = savea, *limb = saveb;
1837 else if (key->month)
1838 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1839 /* Sorting like this may become slow, so in a simple locale the user
1840 can select a faster sort that is similar to ascii sort. */
1841 else if (hard_LC_COLLATE)
1843 if (ignore || translate)
1846 size_t size = lena + 1 + lenb + 1;
1847 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1848 char *copy_b = copy_a + lena + 1;
1849 size_t new_len_a, new_len_b, i;
1851 /* Ignore and/or translate chars before comparing. */
1852 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1856 copy_a[new_len_a] = (translate
1857 ? translate[to_uchar (texta[i])]
1859 if (!ignore || !ignore[to_uchar (texta[i])])
1864 copy_b[new_len_b] = (translate
1865 ? translate[to_uchar (textb[i])]
1867 if (!ignore || !ignore[to_uchar (textb[i])])
1872 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1874 if (sizeof buf < size)
1878 diff = - NONZERO (lenb);
1882 diff = xmemcoll (texta, lena, textb, lenb);
1886 #define CMP_WITH_IGNORE(A, B) \
1891 while (texta < lima && ignore[to_uchar (*texta)]) \
1893 while (textb < limb && ignore[to_uchar (*textb)]) \
1895 if (! (texta < lima && textb < limb)) \
1897 diff = to_uchar (A) - to_uchar (B); \
1904 diff = (texta < lima) - (textb < limb); \
1909 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1910 translate[to_uchar (*textb)]);
1912 CMP_WITH_IGNORE (*texta, *textb);
1915 diff = - NONZERO (lenb);
1922 while (texta < lima && textb < limb)
1924 diff = (to_uchar (translate[to_uchar (*texta++)])
1925 - to_uchar (translate[to_uchar (*textb++)]));
1932 diff = memcmp (texta, textb, MIN (lena, lenb));
1936 diff = lena < lenb ? -1 : lena != lenb;
1946 /* Find the beginning and limit of the next field. */
1947 if (key->eword != SIZE_MAX)
1948 lima = limfield (a, key), limb = limfield (b, key);
1950 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1952 if (key->sword != SIZE_MAX)
1953 texta = begfield (a, key), textb = begfield (b, key);
1956 texta = a->text, textb = b->text;
1957 if (key->skipsblanks)
1959 while (texta < lima && blanks[to_uchar (*texta)])
1961 while (textb < limb && blanks[to_uchar (*textb)])
1972 return key->reverse ? -diff : diff;
1975 /* Compare two lines A and B, returning negative, zero, or positive
1976 depending on whether A compares less than, equal to, or greater than B. */
1979 compare (const struct line *a, const struct line *b)
1984 /* First try to compare on the specified keys (if any).
1985 The only two cases with no key at all are unadorned sort,
1986 and unadorned sort -r. */
1989 diff = keycompare (a, b);
1990 if (diff | unique | stable)
1994 /* If the keys all compare equal (or no keys were specified)
1995 fall through to the default comparison. */
1996 alen = a->length - 1, blen = b->length - 1;
1999 diff = - NONZERO (blen);
2002 else if (hard_LC_COLLATE)
2003 diff = xmemcoll (a->text, alen, b->text, blen);
2004 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2005 diff = alen < blen ? -1 : alen != blen;
2007 return reverse ? -diff : diff;
2010 /* Check that the lines read from FILE_NAME come in order. Return
2011 true if they are in order. If CHECKONLY == 'c', also print a
2012 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2013 they are not in order. */
2016 check (char const *file_name, char checkonly)
2018 FILE *fp = xfopen (file_name, "r");
2019 struct buffer buf; /* Input buffer. */
2020 struct line temp; /* Copy of previous line. */
2022 uintmax_t line_number = 0;
2023 struct keyfield const *key = keylist;
2024 bool nonunique = ! unique;
2025 bool ordered = true;
2027 initbuf (&buf, sizeof (struct line),
2028 MAX (merge_buffer_size, sort_size));
2031 while (fillbuf (&buf, fp, file_name))
2033 struct line const *line = buffer_linelim (&buf);
2034 struct line const *linebase = line - buf.nlines;
2036 /* Make sure the line saved from the old buffer contents is
2037 less than or equal to the first line of the new buffer. */
2038 if (alloc && nonunique <= compare (&temp, line - 1))
2042 if (checkonly == 'c')
2044 struct line const *disorder_line = line - 1;
2045 uintmax_t disorder_line_number =
2046 buffer_linelim (&buf) - disorder_line + line_number;
2047 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2048 fprintf (stderr, _("%s: %s:%s: disorder: "),
2049 program_name, file_name,
2050 umaxtostr (disorder_line_number, hr_buf));
2051 write_bytes (disorder_line->text, disorder_line->length,
2052 stderr, _("standard error"));
2060 /* Compare each line in the buffer with its successor. */
2061 while (linebase < --line)
2062 if (nonunique <= compare (line, line - 1))
2063 goto found_disorder;
2065 line_number += buf.nlines;
2067 /* Save the last line of the buffer. */
2068 if (alloc < line->length)
2075 alloc = line->length;
2079 while (alloc < line->length);
2081 temp.text = xrealloc (temp.text, alloc);
2083 memcpy (temp.text, line->text, line->length);
2084 temp.length = line->length;
2087 temp.keybeg = temp.text + (line->keybeg - line->text);
2088 temp.keylim = temp.text + (line->keylim - line->text);
2092 xfclose (fp, file_name);
2098 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2099 files (all of which are at the start of the FILES array), and
2100 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2101 Close input and output files before returning.
2102 OUTPUT_FILE gives the name of the output file. If it is NULL,
2103 the output file is standard output. If OFP is NULL, the output
2104 file has not been opened yet (or written to, if standard output). */
2107 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2108 FILE *ofp, char const *output_file)
2110 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2111 /* Input streams for each file. */
2112 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2113 /* Input buffers for each file. */
2114 struct line saved; /* Saved line storage for unique check. */
2115 struct line const *savedline = NULL;
2116 /* &saved if there is a saved line. */
2117 size_t savealloc = 0; /* Size allocated for the saved line. */
2118 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2119 /* Current line in each line table. */
2120 struct line const **base = xnmalloc (nmerge, sizeof *base);
2121 /* Base of each line table. */
2122 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2123 /* Table representing a permutation of fps,
2124 such that cur[ord[0]] is the smallest line
2125 and will be next output. */
2129 struct keyfield const *key = keylist;
2132 /* Read initial lines from each input file. */
2133 for (i = 0; i < nfiles; )
2135 fps[i] = (files[i].pid
2136 ? open_temp (files[i].name, files[i].pid)
2137 : xfopen (files[i].name, "r"));
2138 initbuf (&buffer[i], sizeof (struct line),
2139 MAX (merge_buffer_size, sort_size / nfiles));
2140 if (fillbuf (&buffer[i], fps[i], files[i].name))
2142 struct line const *linelim = buffer_linelim (&buffer[i]);
2143 cur[i] = linelim - 1;
2144 base[i] = linelim - buffer[i].nlines;
2149 /* fps[i] is empty; eliminate it from future consideration. */
2150 xfclose (fps[i], files[i].name);
2154 zaptemp (files[i].name);
2156 free (buffer[i].buf);
2158 for (j = i; j < nfiles; ++j)
2159 files[j] = files[j + 1];
2164 ofp = xfopen (output_file, "w");
2166 /* Set up the ord table according to comparisons among input lines.
2167 Since this only reorders two items if one is strictly greater than
2168 the other, it is stable. */
2169 for (i = 0; i < nfiles; ++i)
2171 for (i = 1; i < nfiles; ++i)
2172 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2173 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2175 /* Repeatedly output the smallest line until no input remains. */
2178 struct line const *smallest = cur[ord[0]];
2180 /* If uniquified output is turned on, output only the first of
2181 an identical series of lines. */
2184 if (savedline && compare (savedline, smallest))
2187 write_bytes (saved.text, saved.length, ofp, output_file);
2192 if (savealloc < smallest->length)
2197 savealloc = smallest->length;
2200 while ((savealloc *= 2) < smallest->length);
2202 saved.text = xrealloc (saved.text, savealloc);
2204 saved.length = smallest->length;
2205 memcpy (saved.text, smallest->text, saved.length);
2209 saved.text + (smallest->keybeg - smallest->text);
2211 saved.text + (smallest->keylim - smallest->text);
2216 write_bytes (smallest->text, smallest->length, ofp, output_file);
2218 /* Check if we need to read more lines into core. */
2219 if (base[ord[0]] < smallest)
2220 cur[ord[0]] = smallest - 1;
2223 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2225 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2226 cur[ord[0]] = linelim - 1;
2227 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2231 /* We reached EOF on fps[ord[0]]. */
2232 for (i = 1; i < nfiles; ++i)
2233 if (ord[i] > ord[0])
2236 xfclose (fps[ord[0]], files[ord[0]].name);
2237 if (ord[0] < ntemps)
2240 zaptemp (files[ord[0]].name);
2242 free (buffer[ord[0]].buf);
2243 for (i = ord[0]; i < nfiles; ++i)
2245 fps[i] = fps[i + 1];
2246 files[i] = files[i + 1];
2247 buffer[i] = buffer[i + 1];
2248 cur[i] = cur[i + 1];
2249 base[i] = base[i + 1];
2251 for (i = 0; i < nfiles; ++i)
2252 ord[i] = ord[i + 1];
2257 /* The new line just read in may be larger than other lines
2258 already in main memory; push it back in the queue until we
2259 encounter a line larger than it. Optimize for the common
2260 case where the new line is smallest. */
2265 size_t ord0 = ord[0];
2266 size_t count_of_smaller_lines;
2270 int cmp = compare (cur[ord0], cur[ord[probe]]);
2271 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2275 probe = (lo + hi) / 2;
2278 count_of_smaller_lines = lo - 1;
2279 for (j = 0; j < count_of_smaller_lines; j++)
2280 ord[j] = ord[j + 1];
2281 ord[count_of_smaller_lines] = ord0;
2284 /* Free up some resources every once in a while. */
2285 if (MAX_PROCS_BEFORE_REAP < nprocs)
2289 if (unique && savedline)
2291 write_bytes (saved.text, saved.length, ofp, output_file);
2295 xfclose (ofp, output_file);
2303 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2304 and HI (with NHI members). T, LO, and HI point just past their
2305 respective arrays, and the arrays are in reverse order. NLO and
2306 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2309 mergelines (struct line *t,
2310 struct line const *lo, size_t nlo,
2311 struct line const *hi, size_t nhi)
2314 if (compare (lo - 1, hi - 1) <= 0)
2319 /* HI - NHI equalled T - (NLO + NHI) when this function
2320 began. Therefore HI must equal T now, and there is no
2321 need to copy from HI to T. */
2339 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2340 NLINES must be at least 2.
2341 The input and output arrays are in reverse order, and LINES and
2342 TEMP point just past the end of their respective arrays.
2344 Use a recursive divide-and-conquer algorithm, in the style
2345 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2346 the optimization suggested by exercise 5.2.4-10; this requires room
2347 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2348 writes that this memory optimization was originally published by
2349 D. A. Bell, Comp J. 1 (1958), 75. */
2352 sortlines (struct line *lines, size_t nlines, struct line *temp)
2356 if (0 < compare (&lines[-1], &lines[-2]))
2358 struct line tmp = lines[-1];
2359 lines[-1] = lines[-2];
2365 size_t nlo = nlines / 2;
2366 size_t nhi = nlines - nlo;
2367 struct line *lo = lines;
2368 struct line *hi = lines - nlo;
2369 struct line *sorted_lo = temp;
2371 sortlines (hi, nhi, temp);
2373 sortlines_temp (lo, nlo, sorted_lo);
2375 sorted_lo[-1] = lo[-1];
2377 mergelines (lines, sorted_lo, nlo, hi, nhi);
2381 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2382 rather than sorting in place. */
2385 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2389 /* Declare `swap' as int, not bool, to work around a bug
2390 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2391 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2392 int swap = (0 < compare (&lines[-1], &lines[-2]));
2393 temp[-1] = lines[-1 - swap];
2394 temp[-2] = lines[-2 + swap];
2398 size_t nlo = nlines / 2;
2399 size_t nhi = nlines - nlo;
2400 struct line *lo = lines;
2401 struct line *hi = lines - nlo;
2402 struct line *sorted_hi = temp - nlo;
2404 sortlines_temp (hi, nhi, sorted_hi);
2406 sortlines (lo, nlo, temp);
2408 mergelines (temp, lo, nlo, sorted_hi, nhi);
2412 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2413 the same as OUTFILE. If found, merge the found instances (and perhaps
2414 some other files) into a temporary file so that it can in turn be
2415 merged into OUTFILE without destroying OUTFILE before it is completely
2416 read. Return the new value of NFILES, which differs from the old if
2417 some merging occurred.
2419 This test ensures that an otherwise-erroneous use like
2420 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2421 It's not clear that POSIX requires this nicety.
2422 Detect common error cases, but don't try to catch obscure cases like
2423 "cat ... FILE ... | sort -m -o FILE"
2424 where traditional "sort" doesn't copy the input and where
2425 people should know that they're getting into trouble anyway.
2426 Catching these obscure cases would slow down performance in
2430 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2431 size_t nfiles, char const *outfile)
2434 bool got_outstat = false;
2435 struct stat outstat;
2437 for (i = ntemps; i < nfiles; i++)
2439 bool is_stdin = STREQ (files[i].name, "-");
2443 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2450 ? stat (outfile, &outstat)
2451 : fstat (STDOUT_FILENO, &outstat))
2458 ? fstat (STDIN_FILENO, &instat)
2459 : stat (files[i].name, &instat))
2461 && SAME_INODE (instat, outstat));
2468 char *temp = create_temp (&tftp, &pid);
2469 mergefps (&files[i],0, nfiles - i, tftp, temp);
2470 files[i].name = temp;
2479 /* Merge the input FILES. NTEMPS is the number of files at the
2480 start of FILES that are temporary; it is zero at the top level.
2481 NFILES is the total number of files. Put the output in
2482 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2485 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2486 char const *output_file)
2488 while (nmerge < nfiles)
2490 /* Number of input files processed so far. */
2493 /* Number of output files generated so far. */
2496 /* nfiles % NMERGE; this counts input files that are left over
2497 after all full-sized merges have been done. */
2500 /* Number of easily-available slots at the next loop iteration. */
2503 /* Do as many NMERGE-size merges as possible. */
2504 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2508 char *temp = create_temp (&tfp, &pid);
2509 size_t nt = MIN (ntemps, nmerge);
2511 mergefps (&files[in], nt, nmerge, tfp, temp);
2512 files[out].name = temp;
2513 files[out].pid = pid;
2516 remainder = nfiles - in;
2517 cheap_slots = nmerge - out % nmerge;
2519 if (cheap_slots < remainder)
2521 /* So many files remain that they can't all be put into the last
2522 NMERGE-sized output window. Do one more merge. Merge as few
2523 files as possible, to avoid needless I/O. */
2524 size_t nshortmerge = remainder - cheap_slots + 1;
2527 char *temp = create_temp (&tfp, &pid);
2528 size_t nt = MIN (ntemps, nshortmerge);
2530 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2531 files[out].name = temp;
2532 files[out++].pid = pid;
2536 /* Put the remaining input files into the last NMERGE-sized output
2537 window, so they will be merged in the next pass. */
2538 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2543 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2544 mergefps (files, ntemps, nfiles, NULL, output_file);
2547 /* Sort NFILES FILES onto OUTPUT_FILE. */
2550 sort (char * const *files, size_t nfiles, char const *output_file)
2554 bool output_file_created = false;
2560 char const *temp_output;
2561 char const *file = *files;
2562 FILE *fp = xfopen (file, "r");
2564 size_t bytes_per_line = (2 * sizeof (struct line)
2565 - sizeof (struct line) / 2);
2568 initbuf (&buf, bytes_per_line,
2569 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2574 while (fillbuf (&buf, fp, file))
2577 struct line *linebase;
2579 if (buf.eof && nfiles
2580 && (bytes_per_line + 1
2581 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2583 /* End of file, but there is more input and buffer room.
2584 Concatenate the next input file; this is faster in
2586 buf.left = buf.used;
2590 line = buffer_linelim (&buf);
2591 linebase = line - buf.nlines;
2593 sortlines (line, buf.nlines, linebase);
2594 if (buf.eof && !nfiles && !ntemps && !buf.left)
2597 tfp = xfopen (output_file, "w");
2598 temp_output = output_file;
2599 output_file_created = true;
2604 temp_output = create_temp (&tfp, NULL);
2610 write_bytes (line->text, line->length, tfp, temp_output);
2612 while (linebase < line && compare (line, line - 1) == 0)
2615 while (linebase < line);
2617 xfclose (tfp, temp_output);
2619 /* Free up some resources every once in a while. */
2620 if (MAX_PROCS_BEFORE_REAP < nprocs)
2623 if (output_file_created)
2632 if (! output_file_created)
2635 struct tempnode *node = temphead;
2636 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2637 for (i = 0; node; i++)
2639 tempfiles[i].name = node->name;
2640 tempfiles[i].pid = node->pid;
2643 merge (tempfiles, ntemps, ntemps, output_file);
2648 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2651 insertkey (struct keyfield *key_arg)
2653 struct keyfield **p;
2654 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2656 for (p = &keylist; *p; p = &(*p)->next)
2662 /* Report a bad field specification SPEC, with extra info MSGID. */
2664 static void badfieldspec (char const *, char const *)
2667 badfieldspec (char const *spec, char const *msgid)
2669 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2670 _(msgid), quote (spec));
2674 /* Report incompatible options. */
2676 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2678 incompatible_options (char const *opts)
2680 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2684 /* Check compatibility of ordering options. */
2687 check_ordering_compatibility (void)
2689 struct keyfield const *key;
2691 for (key = keylist; key; key = key->next)
2692 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2694 || (key->random && key->translate))
2698 if (key->ignore == nondictionary)
2702 if (key->general_numeric)
2704 if (key->ignore == nonprinting)
2713 incompatible_options (opts);
2717 /* Parse the leading integer in STRING and store the resulting value
2718 (which must fit into size_t) into *VAL. Return the address of the
2719 suffix after the integer. If the value is too large, silently
2720 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2721 failure; otherwise, report MSGID and exit on failure. */
2724 parse_field_count (char const *string, size_t *val, char const *msgid)
2729 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2732 case LONGINT_INVALID_SUFFIX_CHAR:
2737 case LONGINT_OVERFLOW:
2738 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2742 case LONGINT_INVALID:
2744 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2745 _(msgid), quote (string));
2752 /* Handle interrupts and hangups. */
2755 sighandler (int sig)
2758 signal (sig, SIG_IGN);
2762 signal (sig, SIG_DFL);
2766 /* Set the ordering options for KEY specified in S.
2767 Return the address of the first character in S that
2768 is not a valid ordering option.
2769 BLANKTYPE is the kind of blanks that 'b' should skip. */
2772 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2779 if (blanktype == bl_start || blanktype == bl_both)
2780 key->skipsblanks = true;
2781 if (blanktype == bl_end || blanktype == bl_both)
2782 key->skipeblanks = true;
2785 key->ignore = nondictionary;
2788 key->translate = fold_toupper;
2791 key->general_numeric = true;
2794 /* Option order should not matter, so don't let -i override
2795 -d. -d implies -i, but -i does not imply -d. */
2797 key->ignore = nonprinting;
2803 key->numeric = true;
2809 key->reverse = true;
2819 static struct keyfield *
2820 key_init (struct keyfield *key)
2822 memset (key, 0, sizeof *key);
2823 key->eword = SIZE_MAX;
2828 main (int argc, char **argv)
2830 struct keyfield *key;
2831 struct keyfield key_buf;
2832 struct keyfield gkey;
2836 bool mergeonly = false;
2837 char *random_source = NULL;
2838 bool need_random = false;
2840 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2841 bool obsolete_usage = (posix2_version () < 200112);
2843 char *files_from = NULL;
2845 char const *outfile = NULL;
2847 initialize_main (&argc, &argv);
2848 set_program_name (argv[0]);
2849 setlocale (LC_ALL, "");
2850 bindtextdomain (PACKAGE, LOCALEDIR);
2851 textdomain (PACKAGE);
2853 initialize_exit_failure (SORT_FAILURE);
2855 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2856 #if HAVE_NL_LANGINFO
2857 hard_LC_TIME = hard_locale (LC_TIME);
2860 /* Get locale's representation of the decimal point. */
2862 struct lconv const *locale = localeconv ();
2864 /* If the locale doesn't define a decimal point, or if the decimal
2865 point is multibyte, use the C locale's decimal point. FIXME:
2866 add support for multibyte decimal points. */
2867 decimal_point = to_uchar (locale->decimal_point[0]);
2868 if (! decimal_point || locale->decimal_point[1])
2869 decimal_point = '.';
2871 /* FIXME: add support for multibyte thousands separators. */
2872 thousands_sep = to_uchar (*locale->thousands_sep);
2873 if (! thousands_sep || locale->thousands_sep[1])
2877 have_read_stdin = false;
2882 static int const sig[] =
2884 /* The usual suspects. */
2885 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2902 enum { nsigs = sizeof sig / sizeof sig[0] };
2905 struct sigaction act;
2907 sigemptyset (&caught_signals);
2908 for (i = 0; i < nsigs; i++)
2910 sigaction (sig[i], NULL, &act);
2911 if (act.sa_handler != SIG_IGN)
2912 sigaddset (&caught_signals, sig[i]);
2915 act.sa_handler = sighandler;
2916 act.sa_mask = caught_signals;
2919 for (i = 0; i < nsigs; i++)
2920 if (sigismember (&caught_signals, sig[i]))
2921 sigaction (sig[i], &act, NULL);
2923 for (i = 0; i < nsigs; i++)
2924 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2926 signal (sig[i], sighandler);
2927 siginterrupt (sig[i], 1);
2932 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2933 atexit (exit_cleanup);
2935 gkey.sword = gkey.eword = SIZE_MAX;
2937 gkey.translate = NULL;
2938 gkey.numeric = gkey.general_numeric = gkey.random = false;
2939 gkey.month = gkey.reverse = false;
2940 gkey.skipsblanks = gkey.skipeblanks = false;
2942 files = xnmalloc (argc, sizeof *files);
2946 /* Parse an operand as a file after "--" was seen; or if
2947 pedantic and a file was seen, unless the POSIX version
2948 predates 1003.1-2001 and -c was not seen and the operand is
2949 "-o FILE" or "-oFILE". */
2953 || (posixly_correct && nfiles != 0
2954 && ! (obsolete_usage
2957 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2958 && (argv[optind][2] || optind + 1 != argc)))
2959 || ((c = getopt_long (argc, argv, short_options,
2965 files[nfiles++] = argv[optind++];
2971 if (optarg[0] == '+')
2973 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2974 && ISDIGIT (argv[optind][1]));
2975 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2978 /* Treat +POS1 [-POS2] as a key if possible; but silently
2979 treat an operand as a file if it is not a valid +POS1. */
2980 key = key_init (&key_buf);
2981 s = parse_field_count (optarg + 1, &key->sword, NULL);
2983 s = parse_field_count (s + 1, &key->schar, NULL);
2984 if (! (key->sword | key->schar))
2985 key->sword = SIZE_MAX;
2986 if (! s || *set_ordering (s, key, bl_start))
2990 if (minus_pos_usage)
2992 char const *optarg1 = argv[optind++];
2993 s = parse_field_count (optarg1 + 1, &key->eword,
2994 N_("invalid number after `-'"));
2996 s = parse_field_count (s + 1, &key->echar,
2997 N_("invalid number after `.'"));
2998 if (*set_ordering (s, key, bl_end))
2999 badfieldspec (optarg1,
3000 N_("stray character in field spec"));
3007 files[nfiles++] = optarg;
3011 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3026 set_ordering (str, &gkey, bl_both);
3032 ? XARGMATCH ("--check", optarg, check_args, check_types)
3037 if (checkonly && checkonly != c)
3038 incompatible_options ("cC");
3042 case COMPRESS_PROGRAM_OPTION:
3043 if (compress_program && !STREQ (compress_program, optarg))
3044 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3045 compress_program = optarg;
3048 case FILES0_FROM_OPTION:
3049 files_from = optarg;
3053 key = key_init (&key_buf);
3056 s = parse_field_count (optarg, &key->sword,
3057 N_("invalid number at field start"));
3060 /* Provoke with `sort -k0' */
3061 badfieldspec (optarg, N_("field number is zero"));
3065 s = parse_field_count (s + 1, &key->schar,
3066 N_("invalid number after `.'"));
3069 /* Provoke with `sort -k1.0' */
3070 badfieldspec (optarg, N_("character offset is zero"));
3073 if (! (key->sword | key->schar))
3074 key->sword = SIZE_MAX;
3075 s = set_ordering (s, key, bl_start);
3078 key->eword = SIZE_MAX;
3084 s = parse_field_count (s + 1, &key->eword,
3085 N_("invalid number after `,'"));
3088 /* Provoke with `sort -k1,0' */
3089 badfieldspec (optarg, N_("field number is zero"));
3092 s = parse_field_count (s + 1, &key->echar,
3093 N_("invalid number after `.'"));
3096 /* `-k 2,3' is equivalent to `+1 -3'. */
3099 s = set_ordering (s, key, bl_end);
3102 badfieldspec (optarg, N_("stray character in field spec"));
3111 specify_nmerge (oi, c, optarg);
3115 if (outfile && !STREQ (outfile, optarg))
3116 error (SORT_FAILURE, 0, _("multiple output files specified"));
3120 case RANDOM_SOURCE_OPTION:
3121 if (random_source && !STREQ (random_source, optarg))
3122 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3123 random_source = optarg;
3131 specify_sort_size (oi, c, optarg);
3136 char newtab = optarg[0];
3138 error (SORT_FAILURE, 0, _("empty tab"));
3141 if (STREQ (optarg, "\\0"))
3145 /* Provoke with `sort -txx'. Complain about
3146 "multi-character tab" instead of "multibyte tab", so
3147 that the diagnostic's wording does not need to be
3148 changed once multibyte characters are supported. */
3149 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3153 if (tab != TAB_DEFAULT && tab != newtab)
3154 error (SORT_FAILURE, 0, _("incompatible tabs"));
3160 add_temp_dir (optarg);
3168 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3169 through Solaris 7. It is also accepted by many non-Solaris
3170 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3171 -y is marked as obsolete starting with Solaris 8 (1999), but is
3172 still accepted as of Solaris 10 prerelease (2004).
3174 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3175 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3176 and which in general ignores the argument after "-y" if it
3177 consists entirely of digits (it can even be empty). */
3178 if (optarg == argv[optind - 1])
3181 for (p = optarg; ISDIGIT (*p); p++)
3183 optind -= (*p != '\0');
3191 case_GETOPT_HELP_CHAR;
3193 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3196 usage (SORT_FAILURE);
3204 /* When using --files0-from=F, you may not specify any files
3205 on the command-line. */
3208 error (0, 0, _("extra operand %s"), quote (files[0]));
3209 fprintf (stderr, "%s\n",
3210 _("file operands cannot be combined with --files0-from"));
3211 usage (SORT_FAILURE);
3214 if (STREQ (files_from, "-"))
3218 stream = fopen (files_from, "r");
3220 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3221 quote (files_from));
3224 readtokens0_init (&tok);
3226 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3227 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3228 quote (files_from));
3236 for (i = 0; i < nfiles; i++)
3238 if (STREQ (files[i], "-"))
3239 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3240 "no file name of %s allowed"),
3242 else if (files[i][0] == '\0')
3244 /* Using the standard `filename:line-number:' prefix here is
3245 not totally appropriate, since NUL is the separator, not NL,
3246 but it might be better than nothing. */
3247 unsigned long int file_number = i + 1;
3248 error (SORT_FAILURE, 0,
3249 _("%s:%lu: invalid zero-length file name"),
3250 quotearg_colon (files_from), file_number);
3255 error (SORT_FAILURE, 0, _("no input from %s"),
3256 quote (files_from));
3259 /* Inheritance of global options to individual keys. */
3260 for (key = keylist; key; key = key->next)
3262 if (! (key->ignore || key->translate
3263 || (key->skipsblanks | key->reverse
3264 | key->skipeblanks | key->month | key->numeric
3265 | key->general_numeric
3268 key->ignore = gkey.ignore;
3269 key->translate = gkey.translate;
3270 key->skipsblanks = gkey.skipsblanks;
3271 key->skipeblanks = gkey.skipeblanks;
3272 key->month = gkey.month;
3273 key->numeric = gkey.numeric;
3274 key->general_numeric = gkey.general_numeric;
3275 key->random = gkey.random;
3276 key->reverse = gkey.reverse;
3279 need_random |= key->random;
3282 if (!keylist && (gkey.ignore || gkey.translate
3283 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3284 | gkey.numeric | gkey.general_numeric
3288 need_random |= gkey.random;
3291 check_ordering_compatibility ();
3293 reverse = gkey.reverse;
3297 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3298 if (! randread_source)
3299 die (_("open failed"), random_source);
3302 if (temp_dir_count == 0)
3304 char const *tmp_dir = getenv ("TMPDIR");
3305 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3310 static char *minus = "-";
3316 /* Need to re-check that we meet the minimum requirement for memory
3317 usage with the final value for NMERGE. */
3319 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3324 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3325 quote (files[1]), checkonly);
3329 static char opts[] = {0, 'o', 0};
3330 opts[0] = checkonly;
3331 incompatible_options (opts);
3334 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3335 input is not properly sorted. */
3336 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3341 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3344 for (i = 0; i < nfiles; ++i)
3345 sortfiles[i].name = files[i];
3347 merge (sortfiles, 0, nfiles, outfile);
3348 IF_LINT (free (sortfiles));
3351 sort (files, nfiles, outfile);
3353 if (have_read_stdin && fclose (stdin) == EOF)
3354 die (_("close failed"), "-");
3356 exit (EXIT_SUCCESS);