1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
83 #define UCHAR_LIM (UCHAR_MAX + 1)
85 #ifndef DEFAULT_TMPDIR
86 # define DEFAULT_TMPDIR "/tmp"
92 /* POSIX says to exit with status 1 if invoked with -c and the
93 input is not properly sorted. */
94 SORT_OUT_OF_ORDER = 1,
96 /* POSIX says any other irregular exit must exit with a status
97 code greater than 1. */
103 /* The number of times we should try to fork a compression process
104 (we retry if the fork call fails). We don't _need_ to compress
105 temp files, this is just to reduce disk access, so this number
107 MAX_FORK_TRIES_COMPRESS = 2,
109 /* The number of times we should try to fork a decompression process.
110 If we can't fork a decompression process, we can't sort, so this
111 number should be big. */
112 MAX_FORK_TRIES_DECOMPRESS = 8
115 /* The representation of the decimal point in the current locale. */
116 static int decimal_point;
118 /* Thousands separator; if -1, then there isn't one. */
119 static int thousands_sep;
121 /* Nonzero if the corresponding locales are hard. */
122 static bool hard_LC_COLLATE;
124 static bool hard_LC_TIME;
127 #define NONZERO(x) ((x) != 0)
129 /* The kind of blanks for '-b' to skip in various options. */
130 enum blanktype { bl_start, bl_end, bl_both };
132 /* The character marking end of line. Default to \n. */
133 static char eolchar = '\n';
135 /* Lines are held in core as counted strings. */
138 char *text; /* Text of the line. */
139 size_t length; /* Length including final newline. */
140 char *keybeg; /* Start of first key. */
141 char *keylim; /* Limit of first key. */
147 char *buf; /* Dynamically allocated buffer,
148 partitioned into 3 regions:
151 - an array of lines, in reverse order. */
152 size_t used; /* Number of bytes used for input data. */
153 size_t nlines; /* Number of lines in the line array. */
154 size_t alloc; /* Number of bytes allocated. */
155 size_t left; /* Number of bytes left from previous reads. */
156 size_t line_bytes; /* Number of bytes to reserve for each line. */
157 bool eof; /* An EOF has been read. */
162 size_t sword; /* Zero-origin 'word' to start at. */
163 size_t schar; /* Additional characters to skip. */
164 size_t eword; /* Zero-origin first word after field. */
165 size_t echar; /* Additional characters in field. */
166 bool const *ignore; /* Boolean array of characters to ignore. */
167 char const *translate; /* Translation applied to characters. */
168 bool skipsblanks; /* Skip leading blanks when finding start. */
169 bool skipeblanks; /* Skip leading blanks when finding end. */
170 bool numeric; /* Flag for numeric comparison. Handle
171 strings of digits with optional decimal
172 point, but no exponential notation. */
173 bool random; /* Sort by random hash of key. */
174 bool general_numeric; /* Flag for general, numeric comparison.
175 Handle numbers in exponential notation. */
176 bool month; /* Flag for comparison by month name. */
177 bool reverse; /* Reverse the sense of comparison. */
178 struct keyfield *next; /* Next keyfield to try. */
187 /* FIXME: None of these tables work with multibyte character sets.
188 Also, there are many other bugs when handling multibyte characters.
189 One way to fix this is to rewrite `sort' to use wide characters
190 internally, but doing this with good performance is a bit
193 /* Table of blanks. */
194 static bool blanks[UCHAR_LIM];
196 /* Table of non-printing characters. */
197 static bool nonprinting[UCHAR_LIM];
199 /* Table of non-dictionary characters (not letters, digits, or blanks). */
200 static bool nondictionary[UCHAR_LIM];
202 /* Translation table folding lower case to upper. */
203 static char fold_toupper[UCHAR_LIM];
205 #define MONTHS_PER_YEAR 12
207 /* Table mapping month names to integers.
208 Alphabetic order allows binary search. */
209 static struct month monthtab[] =
225 /* During the merge phase, the number of files to merge at once. */
226 #define NMERGE_DEFAULT 16
228 /* Minimum size for a merge or check buffer. */
229 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
231 /* Minimum sort size; the code might not work with smaller sizes. */
232 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
234 /* Maximum merge buffers we can theoretically support */
235 #define MAX_NMERGE (SIZE_MAX / MIN_MERGE_BUFFER_SIZE)
237 /* The number of bytes needed for a merge or check buffer, which can
238 function relatively efficiently even if it holds only one line. If
239 a longer line is seen, this value is increased. */
240 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
242 /* The approximate maximum number of bytes of main memory to use, as
243 specified by the user. Zero if the user has not specified a size. */
244 static size_t sort_size;
246 /* The guessed size for non-regular files. */
247 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
249 /* Array of directory names in which any temporary files are to be created. */
250 static char const **temp_dirs;
252 /* Number of temporary directory names used. */
253 static size_t temp_dir_count;
255 /* Number of allocated slots in temp_dirs. */
256 static size_t temp_dir_alloc;
258 /* Flag to reverse the order of all comparisons. */
261 /* Flag for stable sort. This turns off the last ditch bytewise
262 comparison of lines, and instead leaves lines in the same order
263 they were read if all keys compare equal. */
266 /* If TAB has this value, blanks separate fields. */
267 enum { TAB_DEFAULT = CHAR_MAX + 1 };
269 /* Tab character separating fields. If TAB_DEFAULT, then fields are
270 separated by the empty string between a non-blank character and a blank
272 static int tab = TAB_DEFAULT;
274 /* Flag to remove consecutive duplicate lines from the output.
275 Only the last of a sequence of equal lines will be output. */
278 /* Nonzero if any of the input files are the standard input. */
279 static bool have_read_stdin;
281 /* List of key field comparisons to be tried. */
282 static struct keyfield *keylist;
284 /* Program used to (de)compress temp files. Must accept -d. */
285 static char const *compress_program;
287 /* Maximum number of files to merge in one go. If more than this
288 number are present, temp files will be used. */
289 static unsigned int nmerge = NMERGE_DEFAULT;
291 static void sortlines_temp (struct line *, size_t, struct line *);
293 /* Report MESSAGE for FILE, then clean up and exit.
294 If FILE is null, it represents standard output. */
296 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
298 die (char const *message, char const *file)
300 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
307 if (status != EXIT_SUCCESS)
308 fprintf (stderr, _("Try `%s --help' for more information.\n"),
313 Usage: %s [OPTION]... [FILE]...\n\
314 or: %s [OPTION]... --files0-from=F\n\
316 program_name, program_name);
318 Write sorted concatenation of all FILE(s) to standard output.\n\
322 Mandatory arguments to long options are mandatory for short options too.\n\
329 -b, --ignore-leading-blanks ignore leading blanks\n\
330 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
331 -f, --ignore-case fold lower case to upper case characters\n\
334 -g, --general-numeric-sort compare according to general numerical value\n\
335 -i, --ignore-nonprinting consider only printable characters\n\
336 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
337 -n, --numeric-sort compare according to string numerical value\n\
338 -R, --random-sort sort by random hash of keys\n\
339 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
340 --sort=WORD sort according to WORD:\n\
341 general-numeric -g, month -M, numeric -n,\n\
343 -r, --reverse reverse the result of comparisons\n\
351 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
352 for more use temp files\n\
355 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
356 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
357 --compress-program=PROG compress temporaries with PROG;\n\
358 decompress them with PROG -d\n\
359 --files0-from=F read input from the files specified by\n\
360 NUL-terminated names in file F\n\
363 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
364 -m, --merge merge already sorted files; do not sort\n\
367 -o, --output=FILE write result to FILE instead of standard output\n\
368 -s, --stable stabilize sort by disabling last-resort comparison\n\
369 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
372 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
373 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
374 multiple options specify multiple directories\n\
375 -u, --unique with -c, check for strict ordering;\n\
376 without -c, output only the first of an equal run\n\
379 -z, --zero-terminated end lines with 0 byte, not newline\n\
381 fputs (HELP_OPTION_DESCRIPTION, stdout);
382 fputs (VERSION_OPTION_DESCRIPTION, stdout);
385 POS is F[.C][OPTS], where F is the field number and C the character position\n\
386 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
387 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
388 one or more single-letter ordering options, which override global ordering\n\
389 options for that key. If no key is given, use the entire line as the key.\n\
391 SIZE may be followed by the following multiplicative suffixes:\n\
394 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
396 With no FILE, or when FILE is -, read standard input.\n\
399 The locale specified by the environment affects sort order.\n\
400 Set LC_ALL=C to get the traditional sort order that uses\n\
401 native byte values.\n\
403 emit_bug_reporting_address ();
409 /* For long options that have no equivalent short option, use a
410 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
413 CHECK_OPTION = CHAR_MAX + 1,
414 COMPRESS_PROGRAM_OPTION,
417 RANDOM_SOURCE_OPTION,
421 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
423 static struct option const long_options[] =
425 {"ignore-leading-blanks", no_argument, NULL, 'b'},
426 {"check", optional_argument, NULL, CHECK_OPTION},
427 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
428 {"dictionary-order", no_argument, NULL, 'd'},
429 {"ignore-case", no_argument, NULL, 'f'},
430 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
431 {"general-numeric-sort", no_argument, NULL, 'g'},
432 {"ignore-nonprinting", no_argument, NULL, 'i'},
433 {"key", required_argument, NULL, 'k'},
434 {"merge", no_argument, NULL, 'm'},
435 {"month-sort", no_argument, NULL, 'M'},
436 {"numeric-sort", no_argument, NULL, 'n'},
437 {"random-sort", no_argument, NULL, 'R'},
438 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
439 {"sort", required_argument, NULL, SORT_OPTION},
440 {"output", required_argument, NULL, 'o'},
441 {"reverse", no_argument, NULL, 'r'},
442 {"stable", no_argument, NULL, 's'},
443 {"batch-size", required_argument, NULL, NMERGE_OPTION},
444 {"buffer-size", required_argument, NULL, 'S'},
445 {"field-separator", required_argument, NULL, 't'},
446 {"temporary-directory", required_argument, NULL, 'T'},
447 {"unique", no_argument, NULL, 'u'},
448 {"zero-terminated", no_argument, NULL, 'z'},
449 {GETOPT_HELP_OPTION_DECL},
450 {GETOPT_VERSION_OPTION_DECL},
454 static char const *const check_args[] =
456 "quiet", "silent", "diagnose-first", NULL
458 static char const check_types[] =
462 ARGMATCH_VERIFY (check_args, check_types);
464 static char const *const sort_args[] =
466 "general-numeric", "month", "numeric", "random", NULL
468 static char const sort_types[] =
472 ARGMATCH_VERIFY (sort_args, sort_types);
474 /* The set of signals that are caught. */
475 static sigset_t caught_signals;
477 /* Critical section status. */
484 /* Enter a critical section. */
485 static struct cs_status
488 struct cs_status status;
489 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
493 /* Leave a critical section. */
495 cs_leave (struct cs_status status)
499 /* Ignore failure when restoring the signal mask. */
500 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
504 /* The list of temporary files. */
507 struct tempnode *volatile next;
508 pid_t pid; /* If compressed, the pid of compressor, else zero */
509 char name[1]; /* Actual size is 1 + file name length. */
511 static struct tempnode *volatile temphead;
512 static struct tempnode *volatile *temptail = &temphead;
517 pid_t pid; /* If compressed, the pid of compressor, else zero */
520 /* A table where we store compression process states. We clean up all
521 processes in a timely manner so as not to exhaust system resources,
522 so we store the info on whether the process is still running, or has
524 static Hash_table *proctab;
526 enum { INIT_PROCTAB_SIZE = 47 };
528 enum procstate { ALIVE, ZOMBIE };
530 /* A proctab entry. The COUNT field is there in case we fork a new
531 compression process that has the same PID as an old zombie process
532 that is still in the table (because the process to decompress the
533 temp file it was associated with hasn't started yet). */
537 enum procstate state;
542 proctab_hasher (const void *entry, size_t tabsize)
544 const struct procnode *node = entry;
545 return node->pid % tabsize;
549 proctab_comparator (const void *e1, const void *e2)
551 const struct procnode *n1 = e1, *n2 = e2;
552 return n1->pid == n2->pid;
555 /* The total number of forked processes (compressors and decompressors)
556 that have not been reaped yet. */
557 static size_t nprocs;
559 /* The number of child processes we'll allow before we try to reap some. */
560 enum { MAX_PROCS_BEFORE_REAP = 2 };
562 /* If 0 < PID, wait for the child process with that PID to exit.
563 If PID is -1, clean up a random child process which has finished and
564 return the process ID of that child. If PID is -1 and no processes
565 have quit yet, return 0 without waiting. */
571 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
574 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
578 if (! WIFEXITED (status) || WEXITSTATUS (status))
579 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
587 /* Add the PID of a running compression process to proctab, or update
588 the entry COUNT and STATE fields if it's already there. This also
589 creates the table for us the first time it's called. */
592 register_proc (pid_t pid)
594 struct procnode test, *node;
598 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
607 node = hash_lookup (proctab, &test);
615 node = xmalloc (sizeof *node);
619 hash_insert (proctab, node);
623 /* This is called when we reap a random process. We don't know
624 whether we have reaped a compression process or a decompression
625 process until we look in the table. If there's an ALIVE entry for
626 it, then we have reaped a compression process, so change the state
627 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
630 update_proc (pid_t pid)
632 struct procnode test, *node;
635 node = hash_lookup (proctab, &test);
637 node->state = ZOMBIE;
640 /* This is for when we need to wait for a compression process to exit.
641 If it has a ZOMBIE entry in the table then it's already dead and has
642 been reaped. Note that if there's an ALIVE entry for it, it still may
643 already have died and been reaped if a second process was created with
644 the same PID. This is probably exceedingly rare, but to be on the safe
645 side we will have to wait for any compression process with this PID. */
648 wait_proc (pid_t pid)
650 struct procnode test, *node;
653 node = hash_lookup (proctab, &test);
654 if (node->state == ALIVE)
657 node->state = ZOMBIE;
660 hash_delete (proctab, node);
665 /* Keep reaping finished children as long as there are more to reap.
666 This doesn't block waiting for any of them, it only reaps those
667 that are already dead. */
674 while (0 < nprocs && (pid = reap (-1)))
678 /* Clean up any remaining temporary files. */
683 struct tempnode const *node;
685 for (node = temphead; node; node = node->next)
690 /* Cleanup actions to take when exiting. */
697 /* Clean up any remaining temporary files in a critical section so
698 that a signal handler does not try to clean them too. */
699 struct cs_status cs = cs_enter ();
707 /* Create a new temporary file, returning its newly allocated tempnode.
708 Store into *PFD the file descriptor open for writing. */
710 static struct tempnode *
711 create_temp_file (int *pfd)
713 static char const slashbase[] = "/sortXXXXXX";
714 static size_t temp_dir_index;
717 char const *temp_dir = temp_dirs[temp_dir_index];
718 size_t len = strlen (temp_dir);
719 struct tempnode *node =
720 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
721 char *file = node->name;
724 memcpy (file, temp_dir, len);
725 memcpy (file + len, slashbase, sizeof slashbase);
728 if (++temp_dir_index == temp_dir_count)
731 /* Create the temporary file in a critical section, to avoid races. */
737 temptail = &node->next;
744 die (_("cannot create temporary file"), file);
750 /* Return a stream for FILE, opened with mode HOW. A null FILE means
751 standard output; HOW should be "w". When opening for input, "-"
752 means standard input. To avoid confusion, do not return file
753 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
754 opening an ordinary FILE. */
757 xfopen (const char *file, const char *how)
763 else if (STREQ (file, "-") && *how == 'r')
765 have_read_stdin = true;
770 fp = fopen (file, how);
772 die (_("open failed"), file);
778 /* Close FP, whose name is FILE, and report any errors. */
781 xfclose (FILE *fp, char const *file)
786 /* Allow reading stdin from tty more than once. */
792 /* Don't close stdout just yet. close_stdout does that. */
793 if (fflush (fp) != 0)
794 die (_("fflush failed"), file);
798 if (fclose (fp) != 0)
799 die (_("close failed"), file);
805 dup2_or_die (int oldfd, int newfd)
807 if (dup2 (oldfd, newfd) < 0)
808 error (SORT_FAILURE, errno, _("dup2 failed"));
811 /* Fork a child process for piping to and do common cleanup. The
812 TRIES parameter tells us how many times to try to fork before
813 giving up. Return the PID of the child or -1 if fork failed. */
816 pipe_fork (int pipefds[2], size_t tries)
818 #if HAVE_WORKING_FORK
819 struct tempnode *saved_temphead;
821 unsigned int wait_retry = 1;
822 pid_t pid IF_LINT (= -1);
825 if (pipe (pipefds) < 0)
830 /* This is so the child process won't delete our temp files
831 if it receives a signal before exec-ing. */
833 saved_temphead = temphead;
839 temphead = saved_temphead;
844 if (0 <= pid || errno != EAGAIN)
861 close (STDIN_FILENO);
862 close (STDOUT_FILENO);
869 #else /* ! HAVE_WORKING_FORK */
874 /* Create a temporary file and start a compression program to filter output
875 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
876 set it to the PID of the newly-created process. */
879 create_temp (FILE **pfp, pid_t *ppid)
882 struct tempnode *node = create_temp_file (&tempfd);
883 char *name = node->name;
885 if (compress_program)
889 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
896 register_proc (node->pid);
898 else if (node->pid == 0)
901 dup2_or_die (tempfd, STDOUT_FILENO);
903 dup2_or_die (pipefds[0], STDIN_FILENO);
906 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
907 error (SORT_FAILURE, errno, _("couldn't execute %s"),
914 *pfp = fdopen (tempfd, "w");
916 die (_("couldn't create temporary file"), name);
924 /* Open a compressed temp file and start a decompression process through
925 which to filter the input. PID must be the valid processes ID of the
926 process used to compress the file. */
929 open_temp (const char *name, pid_t pid)
931 int tempfd, pipefds[2];
937 tempfd = open (name, O_RDONLY);
939 die (_("couldn't open temporary file"), name);
941 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
947 else if (child_pid == 0)
950 dup2_or_die (tempfd, STDIN_FILENO);
952 dup2_or_die (pipefds[1], STDOUT_FILENO);
955 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
956 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
960 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
963 fp = fdopen (pipefds[0], "r");
965 die (_("couldn't create temporary file"), name);
971 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
973 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
974 die (_("write failed"), output_file);
977 /* Append DIR to the array of temporary directory names. */
979 add_temp_dir (char const *dir)
981 if (temp_dir_count == temp_dir_alloc)
982 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
984 temp_dirs[temp_dir_count++] = dir;
987 /* Remove NAME from the list of temporary files. */
990 zaptemp (const char *name)
992 struct tempnode *volatile *pnode;
993 struct tempnode *node;
994 struct tempnode *next;
996 int unlink_errno = 0;
999 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1002 /* Unlink the temporary file in a critical section to avoid races. */
1005 unlink_status = unlink (name);
1006 unlink_errno = errno;
1010 if (unlink_status != 0)
1011 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1017 #if HAVE_NL_LANGINFO
1020 struct_month_cmp (const void *m1, const void *m2)
1022 struct month const *month1 = m1;
1023 struct month const *month2 = m2;
1024 return strcmp (month1->name, month2->name);
1029 /* Initialize the character class tables. */
1036 for (i = 0; i < UCHAR_LIM; ++i)
1038 blanks[i] = !! isblank (i);
1039 nonprinting[i] = ! isprint (i);
1040 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1041 fold_toupper[i] = toupper (i);
1044 #if HAVE_NL_LANGINFO
1045 /* If we're not in the "C" locale, read different names for months. */
1048 for (i = 0; i < MONTHS_PER_YEAR; i++)
1055 s = (char *) nl_langinfo (ABMON_1 + i);
1057 monthtab[i].name = name = xmalloc (s_len + 1);
1058 monthtab[i].val = i + 1;
1060 for (j = 0; j < s_len; j++)
1061 name[j] = fold_toupper[to_uchar (s[j])];
1064 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1065 sizeof *monthtab, struct_month_cmp);
1070 /* Specify how many inputs may be merged at once.
1071 This may be set on the command-line with the
1072 --batch-size option. */
1074 specify_nmerge (int oi, char c, char const *s)
1077 struct rlimit rlimit;
1078 unsigned int max_nmerge = (unsigned int) MAX_NMERGE;
1079 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1081 /* Try to find out how many file descriptors we'll be able
1082 to open. We need at least nmerge + 3 (STDIN_FILENO,
1083 STDOUT_FILENO and STDERR_FILENO). */
1084 if (getrlimit (RLIMIT_NOFILE, &rlimit) == 0)
1085 max_nmerge = MIN (max_nmerge, rlimit.rlim_cur - 3);
1087 if (e == LONGINT_OK)
1091 e = LONGINT_OVERFLOW;
1096 error (0, 0, _("invalid --%s argument %s"),
1097 long_options[oi].name, quote(s));
1098 error (SORT_FAILURE, 0,
1099 _("minimum --%s argument is %s"),
1100 long_options[oi].name, quote("2"));
1102 else if (max_nmerge < nmerge)
1104 e = LONGINT_OVERFLOW;
1111 if (e == LONGINT_OVERFLOW)
1113 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1114 uinttostr (max_nmerge, max_nmerge_buf);
1115 error (0, 0, _("--%s argument %s too large"),
1116 long_options[oi].name, quote(s));
1117 error (SORT_FAILURE, 0,
1118 _("maximum --%s argument with current rlimit is %s"),
1119 long_options[oi].name, quote (max_nmerge_buf));
1122 xstrtol_fatal (e, oi, c, long_options, s);
1125 /* Specify the amount of main memory to use when sorting. */
1127 specify_sort_size (int oi, char c, char const *s)
1131 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1133 /* The default unit is KiB. */
1134 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1136 if (n <= UINTMAX_MAX / 1024)
1139 e = LONGINT_OVERFLOW;
1142 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1143 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1152 double mem = physmem_total () * n / 100;
1154 /* Use "<", not "<=", to avoid problems with rounding. */
1155 if (mem < UINTMAX_MAX)
1161 e = LONGINT_OVERFLOW;
1166 if (e == LONGINT_OK)
1168 /* If multiple sort sizes are specified, take the maximum, so
1169 that option order does not matter. */
1176 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1180 e = LONGINT_OVERFLOW;
1183 xstrtol_fatal (e, oi, c, long_options, s);
1186 /* Return the default sort size. */
1188 default_sort_size (void)
1190 /* Let MEM be available memory or 1/8 of total memory, whichever
1192 double avail = physmem_available ();
1193 double total = physmem_total ();
1194 double mem = MAX (avail, total / 8);
1195 struct rlimit rlimit;
1197 /* Let SIZE be MEM, but no more than the maximum object size or
1198 system resource limits. Avoid the MIN macro here, as it is not
1199 quite right when only one argument is floating point. Don't
1200 bother to check for values like RLIM_INFINITY since in practice
1201 they are not much less than SIZE_MAX. */
1202 size_t size = SIZE_MAX;
1205 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1206 size = rlimit.rlim_cur;
1208 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1209 size = rlimit.rlim_cur;
1212 /* Leave a large safety margin for the above limits, as failure can
1213 occur when they are exceeded. */
1217 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1218 Exceeding RSS is not fatal, but can be quite slow. */
1219 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1220 size = rlimit.rlim_cur / 16 * 15;
1223 /* Use no less than the minimum. */
1224 return MAX (size, MIN_SORT_SIZE);
1227 /* Return the sort buffer size to use with the input files identified
1228 by FPS and FILES, which are alternate names of the same files.
1229 NFILES gives the number of input files; NFPS may be less. Assume
1230 that each input line requires LINE_BYTES extra bytes' worth of line
1231 information. Do not exceed the size bound specified by the user
1232 (or a default size bound, if the user does not specify one). */
1235 sort_buffer_size (FILE *const *fps, size_t nfps,
1236 char *const *files, size_t nfiles,
1239 /* A bound on the input size. If zero, the bound hasn't been
1241 static size_t size_bound;
1243 /* In the worst case, each input byte is a newline. */
1244 size_t worst_case_per_input_byte = line_bytes + 1;
1246 /* Keep enough room for one extra input line and an extra byte.
1247 This extra room might be needed when preparing to read EOF. */
1248 size_t size = worst_case_per_input_byte + 1;
1252 for (i = 0; i < nfiles; i++)
1258 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1259 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1260 : stat (files[i], &st))
1262 die (_("stat failed"), files[i]);
1264 if (S_ISREG (st.st_mode))
1265 file_size = st.st_size;
1268 /* The file has unknown size. If the user specified a sort
1269 buffer size, use that; otherwise, guess the size. */
1272 file_size = INPUT_FILE_SIZE_GUESS;
1277 size_bound = sort_size;
1279 size_bound = default_sort_size ();
1282 /* Add the amount of memory needed to represent the worst case
1283 where the input consists entirely of newlines followed by a
1284 single non-newline. Check for overflow. */
1285 worst_case = file_size * worst_case_per_input_byte + 1;
1286 if (file_size != worst_case / worst_case_per_input_byte
1287 || size_bound - size <= worst_case)
1295 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1296 must be at least sizeof (struct line). Allocate ALLOC bytes
1300 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1302 /* Ensure that the line array is properly aligned. If the desired
1303 size cannot be allocated, repeatedly halve it until allocation
1304 succeeds. The smaller allocation may hurt overall performance,
1305 but that's better than failing. */
1308 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1309 buf->buf = malloc (alloc);
1313 if (alloc <= line_bytes + 1)
1317 buf->line_bytes = line_bytes;
1319 buf->used = buf->left = buf->nlines = 0;
1323 /* Return one past the limit of the line array. */
1325 static inline struct line *
1326 buffer_linelim (struct buffer const *buf)
1328 return (struct line *) (buf->buf + buf->alloc);
1331 /* Return a pointer to the first character of the field specified
1335 begfield (const struct line *line, const struct keyfield *key)
1337 char *ptr = line->text, *lim = ptr + line->length - 1;
1338 size_t sword = key->sword;
1339 size_t schar = key->schar;
1340 size_t remaining_bytes;
1342 /* The leading field separator itself is included in a field when -t
1345 if (tab != TAB_DEFAULT)
1346 while (ptr < lim && sword--)
1348 while (ptr < lim && *ptr != tab)
1354 while (ptr < lim && sword--)
1356 while (ptr < lim && blanks[to_uchar (*ptr)])
1358 while (ptr < lim && !blanks[to_uchar (*ptr)])
1362 if (key->skipsblanks)
1363 while (ptr < lim && blanks[to_uchar (*ptr)])
1366 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1367 remaining_bytes = lim - ptr;
1368 if (schar < remaining_bytes)
1376 /* Return the limit of (a pointer to the first character after) the field
1377 in LINE specified by KEY. */
1380 limfield (const struct line *line, const struct keyfield *key)
1382 char *ptr = line->text, *lim = ptr + line->length - 1;
1383 size_t eword = key->eword, echar = key->echar;
1384 size_t remaining_bytes;
1386 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1387 whichever comes first. If there are more than EWORD fields, leave
1388 PTR pointing at the beginning of the field having zero-based index,
1389 EWORD. If a delimiter character was specified (via -t), then that
1390 `beginning' is the first character following the delimiting TAB.
1391 Otherwise, leave PTR pointing at the first `blank' character after
1392 the preceding field. */
1393 if (tab != TAB_DEFAULT)
1394 while (ptr < lim && eword--)
1396 while (ptr < lim && *ptr != tab)
1398 if (ptr < lim && (eword | echar))
1402 while (ptr < lim && eword--)
1404 while (ptr < lim && blanks[to_uchar (*ptr)])
1406 while (ptr < lim && !blanks[to_uchar (*ptr)])
1410 #ifdef POSIX_UNSPECIFIED
1411 /* The following block of code makes GNU sort incompatible with
1412 standard Unix sort, so it's ifdef'd out for now.
1413 The POSIX spec isn't clear on how to interpret this.
1414 FIXME: request clarification.
1416 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1417 Date: Thu, 30 May 96 12:20:41 -0400
1418 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1420 [...]I believe I've found another bug in `sort'.
1425 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1428 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1432 Unix sort produced the answer I expected: sort on the single character
1433 in column 7. GNU sort produced different results, because it disagrees
1434 on the interpretation of the key-end spec "M.N". Unix sort reads this
1435 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1436 "skip M-1 fields, then either N-1 characters or the rest of the current
1437 field, whichever comes first". This extra clause applies only to
1438 key-ends, not key-starts.
1441 /* Make LIM point to the end of (one byte past) the current field. */
1442 if (tab != TAB_DEFAULT)
1445 newlim = memchr (ptr, tab, lim - ptr);
1453 while (newlim < lim && blanks[to_uchar (*newlim)])
1455 while (newlim < lim && !blanks[to_uchar (*newlim)])
1461 /* If we're ignoring leading blanks when computing the End
1462 of the field, don't start counting bytes until after skipping
1463 past any leading blanks. */
1464 if (key->skipeblanks)
1465 while (ptr < lim && blanks[to_uchar (*ptr)])
1468 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1469 remaining_bytes = lim - ptr;
1470 if (echar < remaining_bytes)
1478 /* Fill BUF reading from FP, moving buf->left bytes from the end
1479 of buf->buf to the beginning first. If EOF is reached and the
1480 file wasn't terminated by a newline, supply one. Set up BUF's line
1481 table too. FILE is the name of the file corresponding to FP.
1482 Return true if some input was read. */
1485 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1487 struct keyfield const *key = keylist;
1489 size_t line_bytes = buf->line_bytes;
1490 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1495 if (buf->used != buf->left)
1497 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1498 buf->used = buf->left;
1504 char *ptr = buf->buf + buf->used;
1505 struct line *linelim = buffer_linelim (buf);
1506 struct line *line = linelim - buf->nlines;
1507 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1508 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1510 while (line_bytes + 1 < avail)
1512 /* Read as many bytes as possible, but do not read so many
1513 bytes that there might not be enough room for the
1514 corresponding line array. The worst case is when the
1515 rest of the input file consists entirely of newlines,
1516 except that the last byte is not a newline. */
1517 size_t readsize = (avail - 1) / (line_bytes + 1);
1518 size_t bytes_read = fread (ptr, 1, readsize, fp);
1519 char *ptrlim = ptr + bytes_read;
1521 avail -= bytes_read;
1523 if (bytes_read != readsize)
1526 die (_("read failed"), file);
1530 if (buf->buf == ptrlim)
1532 if (ptrlim[-1] != eol)
1537 /* Find and record each line in the just-read input. */
1538 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1542 line->text = line_start;
1543 line->length = ptr - line_start;
1544 mergesize = MAX (mergesize, line->length);
1545 avail -= line_bytes;
1549 /* Precompute the position of the first key for
1551 line->keylim = (key->eword == SIZE_MAX
1553 : limfield (line, key));
1555 if (key->sword != SIZE_MAX)
1556 line->keybeg = begfield (line, key);
1559 if (key->skipsblanks)
1560 while (blanks[to_uchar (*line_start)])
1562 line->keybeg = line_start;
1574 buf->used = ptr - buf->buf;
1575 buf->nlines = buffer_linelim (buf) - line;
1576 if (buf->nlines != 0)
1578 buf->left = ptr - line_start;
1579 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1584 /* The current input line is too long to fit in the buffer.
1585 Double the buffer size and try again, keeping it properly
1587 size_t line_alloc = buf->alloc / sizeof (struct line);
1588 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1589 buf->alloc = line_alloc * sizeof (struct line);
1594 /* Compare strings A and B as numbers without explicitly converting them to
1595 machine numbers. Comparatively slow for short strings, but asymptotically
1599 numcompare (const char *a, const char *b)
1601 while (blanks[to_uchar (*a)])
1603 while (blanks[to_uchar (*b)])
1606 return strnumcmp (a, b, decimal_point, thousands_sep);
1610 general_numcompare (const char *sa, const char *sb)
1612 /* FIXME: add option to warn about failed conversions. */
1613 /* FIXME: maybe add option to try expensive FP conversion
1614 only if A and B can't be compared more cheaply/accurately. */
1618 double a = strtod (sa, &ea);
1619 double b = strtod (sb, &eb);
1621 /* Put conversion errors at the start of the collating sequence. */
1623 return sb == eb ? 0 : -1;
1627 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1628 conversion errors but before numbers; sort them by internal
1629 bit-pattern, for lack of a more portable alternative. */
1635 : memcmp ((char *) &a, (char *) &b, sizeof a));
1638 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1639 Return 0 if the name in S is not recognized. */
1642 getmonth (char const *month, size_t len)
1645 size_t hi = MONTHS_PER_YEAR;
1646 char const *monthlim = month + len;
1650 if (month == monthlim)
1652 if (!blanks[to_uchar (*month)])
1659 size_t ix = (lo + hi) / 2;
1660 char const *m = month;
1661 char const *n = monthtab[ix].name;
1666 return monthtab[ix].val;
1667 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1672 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1684 /* A source of random data. */
1685 static struct randread_source *randread_source;
1687 /* Return the Ith randomly-generated state. The caller must invoke
1688 random_state (H) for all H less than I before invoking random_state
1691 static struct md5_ctx
1692 random_state (size_t i)
1694 /* An array of states resulting from the random data, and counts of
1695 its used and allocated members. */
1696 static struct md5_ctx *state;
1698 static size_t allocated;
1700 struct md5_ctx *s = &state[i];
1704 unsigned char buf[MD5_DIGEST_SIZE];
1710 state = X2NREALLOC (state, &allocated);
1714 randread (randread_source, buf, sizeof buf);
1716 md5_process_bytes (buf, sizeof buf, s);
1722 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1723 with length LENGTHB. Return negative if less, zero if equal,
1724 positive if greater. */
1727 cmp_hashes (char const *texta, size_t lena,
1728 char const *textb, size_t lenb)
1730 /* Try random hashes until a pair of hashes disagree. But if the
1731 first pair of random hashes agree, check whether the keys are
1732 identical and if so report no difference. */
1737 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1738 struct md5_ctx s[2];
1739 s[0] = s[1] = random_state (i);
1740 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1741 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1742 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1745 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1752 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1753 using one or more random hash functions. */
1756 compare_random (char *restrict texta, size_t lena,
1757 char *restrict textb, size_t lenb)
1761 if (! hard_LC_COLLATE)
1762 diff = cmp_hashes (texta, lena, textb, lenb);
1765 /* Transform the text into the basis of comparison, so that byte
1766 strings that would otherwise considered to be equal are
1767 considered equal here even if their bytes differ. */
1770 char stackbuf[4000];
1771 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1772 bool a_fits = tlena <= sizeof stackbuf;
1773 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1774 (a_fits ? sizeof stackbuf - tlena : 0),
1777 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1781 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1782 faster by avoiding the need for an extra buffer copy. */
1783 buf = xmalloc (tlena + tlenb + 1);
1784 xmemxfrm (buf, tlena + 1, texta, lena);
1785 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1788 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1790 if (buf != stackbuf)
1797 /* Compare two lines A and B trying every key in sequence until there
1798 are no more keys or a difference is found. */
1801 keycompare (const struct line *a, const struct line *b)
1803 struct keyfield const *key = keylist;
1805 /* For the first iteration only, the key positions have been
1806 precomputed for us. */
1807 char *texta = a->keybeg;
1808 char *textb = b->keybeg;
1809 char *lima = a->keylim;
1810 char *limb = b->keylim;
1816 char const *translate = key->translate;
1817 bool const *ignore = key->ignore;
1819 /* Find the lengths. */
1820 size_t lena = lima <= texta ? 0 : lima - texta;
1821 size_t lenb = limb <= textb ? 0 : limb - textb;
1823 /* Actually compare the fields. */
1826 diff = compare_random (texta, lena, textb, lenb);
1827 else if (key->numeric | key->general_numeric)
1829 char savea = *lima, saveb = *limb;
1831 *lima = *limb = '\0';
1832 diff = ((key->numeric ? numcompare : general_numcompare)
1834 *lima = savea, *limb = saveb;
1836 else if (key->month)
1837 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1838 /* Sorting like this may become slow, so in a simple locale the user
1839 can select a faster sort that is similar to ascii sort. */
1840 else if (hard_LC_COLLATE)
1842 if (ignore || translate)
1845 size_t size = lena + 1 + lenb + 1;
1846 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1847 char *copy_b = copy_a + lena + 1;
1848 size_t new_len_a, new_len_b, i;
1850 /* Ignore and/or translate chars before comparing. */
1851 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1855 copy_a[new_len_a] = (translate
1856 ? translate[to_uchar (texta[i])]
1858 if (!ignore || !ignore[to_uchar (texta[i])])
1863 copy_b[new_len_b] = (translate
1864 ? translate[to_uchar (textb[i])]
1866 if (!ignore || !ignore[to_uchar (textb[i])])
1871 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1873 if (sizeof buf < size)
1877 diff = - NONZERO (lenb);
1881 diff = xmemcoll (texta, lena, textb, lenb);
1885 #define CMP_WITH_IGNORE(A, B) \
1890 while (texta < lima && ignore[to_uchar (*texta)]) \
1892 while (textb < limb && ignore[to_uchar (*textb)]) \
1894 if (! (texta < lima && textb < limb)) \
1896 diff = to_uchar (A) - to_uchar (B); \
1903 diff = (texta < lima) - (textb < limb); \
1908 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1909 translate[to_uchar (*textb)]);
1911 CMP_WITH_IGNORE (*texta, *textb);
1914 diff = - NONZERO (lenb);
1921 while (texta < lima && textb < limb)
1923 diff = (to_uchar (translate[to_uchar (*texta++)])
1924 - to_uchar (translate[to_uchar (*textb++)]));
1931 diff = memcmp (texta, textb, MIN (lena, lenb));
1935 diff = lena < lenb ? -1 : lena != lenb;
1945 /* Find the beginning and limit of the next field. */
1946 if (key->eword != SIZE_MAX)
1947 lima = limfield (a, key), limb = limfield (b, key);
1949 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1951 if (key->sword != SIZE_MAX)
1952 texta = begfield (a, key), textb = begfield (b, key);
1955 texta = a->text, textb = b->text;
1956 if (key->skipsblanks)
1958 while (texta < lima && blanks[to_uchar (*texta)])
1960 while (textb < limb && blanks[to_uchar (*textb)])
1971 return key->reverse ? -diff : diff;
1974 /* Compare two lines A and B, returning negative, zero, or positive
1975 depending on whether A compares less than, equal to, or greater than B. */
1978 compare (const struct line *a, const struct line *b)
1983 /* First try to compare on the specified keys (if any).
1984 The only two cases with no key at all are unadorned sort,
1985 and unadorned sort -r. */
1988 diff = keycompare (a, b);
1989 if (diff | unique | stable)
1993 /* If the keys all compare equal (or no keys were specified)
1994 fall through to the default comparison. */
1995 alen = a->length - 1, blen = b->length - 1;
1998 diff = - NONZERO (blen);
2001 else if (hard_LC_COLLATE)
2002 diff = xmemcoll (a->text, alen, b->text, blen);
2003 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2004 diff = alen < blen ? -1 : alen != blen;
2006 return reverse ? -diff : diff;
2009 /* Check that the lines read from FILE_NAME come in order. Return
2010 true if they are in order. If CHECKONLY == 'c', also print a
2011 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2012 they are not in order. */
2015 check (char const *file_name, char checkonly)
2017 FILE *fp = xfopen (file_name, "r");
2018 struct buffer buf; /* Input buffer. */
2019 struct line temp; /* Copy of previous line. */
2021 uintmax_t line_number = 0;
2022 struct keyfield const *key = keylist;
2023 bool nonunique = ! unique;
2024 bool ordered = true;
2026 initbuf (&buf, sizeof (struct line),
2027 MAX (merge_buffer_size, sort_size));
2030 while (fillbuf (&buf, fp, file_name))
2032 struct line const *line = buffer_linelim (&buf);
2033 struct line const *linebase = line - buf.nlines;
2035 /* Make sure the line saved from the old buffer contents is
2036 less than or equal to the first line of the new buffer. */
2037 if (alloc && nonunique <= compare (&temp, line - 1))
2041 if (checkonly == 'c')
2043 struct line const *disorder_line = line - 1;
2044 uintmax_t disorder_line_number =
2045 buffer_linelim (&buf) - disorder_line + line_number;
2046 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2047 fprintf (stderr, _("%s: %s:%s: disorder: "),
2048 program_name, file_name,
2049 umaxtostr (disorder_line_number, hr_buf));
2050 write_bytes (disorder_line->text, disorder_line->length,
2051 stderr, _("standard error"));
2059 /* Compare each line in the buffer with its successor. */
2060 while (linebase < --line)
2061 if (nonunique <= compare (line, line - 1))
2062 goto found_disorder;
2064 line_number += buf.nlines;
2066 /* Save the last line of the buffer. */
2067 if (alloc < line->length)
2074 alloc = line->length;
2078 while (alloc < line->length);
2080 temp.text = xrealloc (temp.text, alloc);
2082 memcpy (temp.text, line->text, line->length);
2083 temp.length = line->length;
2086 temp.keybeg = temp.text + (line->keybeg - line->text);
2087 temp.keylim = temp.text + (line->keylim - line->text);
2091 xfclose (fp, file_name);
2097 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2098 files (all of which are at the start of the FILES array), and
2099 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2100 Close input and output files before returning.
2101 OUTPUT_FILE gives the name of the output file. If it is NULL,
2102 the output file is standard output. If OFP is NULL, the output
2103 file has not been opened yet (or written to, if standard output). */
2106 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2107 FILE *ofp, char const *output_file)
2109 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2110 /* Input streams for each file. */
2111 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2112 /* Input buffers for each file. */
2113 struct line saved; /* Saved line storage for unique check. */
2114 struct line const *savedline = NULL;
2115 /* &saved if there is a saved line. */
2116 size_t savealloc = 0; /* Size allocated for the saved line. */
2117 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2118 /* Current line in each line table. */
2119 struct line const **base = xnmalloc (nmerge, sizeof *base);
2120 /* Base of each line table. */
2121 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2122 /* Table representing a permutation of fps,
2123 such that cur[ord[0]] is the smallest line
2124 and will be next output. */
2128 struct keyfield const *key = keylist;
2131 /* Read initial lines from each input file. */
2132 for (i = 0; i < nfiles; )
2134 fps[i] = (files[i].pid
2135 ? open_temp (files[i].name, files[i].pid)
2136 : xfopen (files[i].name, "r"));
2137 initbuf (&buffer[i], sizeof (struct line),
2138 MAX (merge_buffer_size, sort_size / nfiles));
2139 if (fillbuf (&buffer[i], fps[i], files[i].name))
2141 struct line const *linelim = buffer_linelim (&buffer[i]);
2142 cur[i] = linelim - 1;
2143 base[i] = linelim - buffer[i].nlines;
2148 /* fps[i] is empty; eliminate it from future consideration. */
2149 xfclose (fps[i], files[i].name);
2153 zaptemp (files[i].name);
2155 free (buffer[i].buf);
2157 for (j = i; j < nfiles; ++j)
2158 files[j] = files[j + 1];
2163 ofp = xfopen (output_file, "w");
2165 /* Set up the ord table according to comparisons among input lines.
2166 Since this only reorders two items if one is strictly greater than
2167 the other, it is stable. */
2168 for (i = 0; i < nfiles; ++i)
2170 for (i = 1; i < nfiles; ++i)
2171 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2172 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2174 /* Repeatedly output the smallest line until no input remains. */
2177 struct line const *smallest = cur[ord[0]];
2179 /* If uniquified output is turned on, output only the first of
2180 an identical series of lines. */
2183 if (savedline && compare (savedline, smallest))
2186 write_bytes (saved.text, saved.length, ofp, output_file);
2191 if (savealloc < smallest->length)
2196 savealloc = smallest->length;
2199 while ((savealloc *= 2) < smallest->length);
2201 saved.text = xrealloc (saved.text, savealloc);
2203 saved.length = smallest->length;
2204 memcpy (saved.text, smallest->text, saved.length);
2208 saved.text + (smallest->keybeg - smallest->text);
2210 saved.text + (smallest->keylim - smallest->text);
2215 write_bytes (smallest->text, smallest->length, ofp, output_file);
2217 /* Check if we need to read more lines into core. */
2218 if (base[ord[0]] < smallest)
2219 cur[ord[0]] = smallest - 1;
2222 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2224 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2225 cur[ord[0]] = linelim - 1;
2226 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2230 /* We reached EOF on fps[ord[0]]. */
2231 for (i = 1; i < nfiles; ++i)
2232 if (ord[i] > ord[0])
2235 xfclose (fps[ord[0]], files[ord[0]].name);
2236 if (ord[0] < ntemps)
2239 zaptemp (files[ord[0]].name);
2241 free (buffer[ord[0]].buf);
2242 for (i = ord[0]; i < nfiles; ++i)
2244 fps[i] = fps[i + 1];
2245 files[i] = files[i + 1];
2246 buffer[i] = buffer[i + 1];
2247 cur[i] = cur[i + 1];
2248 base[i] = base[i + 1];
2250 for (i = 0; i < nfiles; ++i)
2251 ord[i] = ord[i + 1];
2256 /* The new line just read in may be larger than other lines
2257 already in main memory; push it back in the queue until we
2258 encounter a line larger than it. Optimize for the common
2259 case where the new line is smallest. */
2264 size_t ord0 = ord[0];
2265 size_t count_of_smaller_lines;
2269 int cmp = compare (cur[ord0], cur[ord[probe]]);
2270 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2274 probe = (lo + hi) / 2;
2277 count_of_smaller_lines = lo - 1;
2278 for (j = 0; j < count_of_smaller_lines; j++)
2279 ord[j] = ord[j + 1];
2280 ord[count_of_smaller_lines] = ord0;
2283 /* Free up some resources every once in a while. */
2284 if (MAX_PROCS_BEFORE_REAP < nprocs)
2288 if (unique && savedline)
2290 write_bytes (saved.text, saved.length, ofp, output_file);
2294 xfclose (ofp, output_file);
2302 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2303 and HI (with NHI members). T, LO, and HI point just past their
2304 respective arrays, and the arrays are in reverse order. NLO and
2305 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2308 mergelines (struct line *t,
2309 struct line const *lo, size_t nlo,
2310 struct line const *hi, size_t nhi)
2313 if (compare (lo - 1, hi - 1) <= 0)
2318 /* HI - NHI equalled T - (NLO + NHI) when this function
2319 began. Therefore HI must equal T now, and there is no
2320 need to copy from HI to T. */
2338 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2339 NLINES must be at least 2.
2340 The input and output arrays are in reverse order, and LINES and
2341 TEMP point just past the end of their respective arrays.
2343 Use a recursive divide-and-conquer algorithm, in the style
2344 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2345 the optimization suggested by exercise 5.2.4-10; this requires room
2346 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2347 writes that this memory optimization was originally published by
2348 D. A. Bell, Comp J. 1 (1958), 75. */
2351 sortlines (struct line *lines, size_t nlines, struct line *temp)
2355 if (0 < compare (&lines[-1], &lines[-2]))
2357 struct line tmp = lines[-1];
2358 lines[-1] = lines[-2];
2364 size_t nlo = nlines / 2;
2365 size_t nhi = nlines - nlo;
2366 struct line *lo = lines;
2367 struct line *hi = lines - nlo;
2368 struct line *sorted_lo = temp;
2370 sortlines (hi, nhi, temp);
2372 sortlines_temp (lo, nlo, sorted_lo);
2374 sorted_lo[-1] = lo[-1];
2376 mergelines (lines, sorted_lo, nlo, hi, nhi);
2380 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2381 rather than sorting in place. */
2384 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2388 /* Declare `swap' as int, not bool, to work around a bug
2389 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2390 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2391 int swap = (0 < compare (&lines[-1], &lines[-2]));
2392 temp[-1] = lines[-1 - swap];
2393 temp[-2] = lines[-2 + swap];
2397 size_t nlo = nlines / 2;
2398 size_t nhi = nlines - nlo;
2399 struct line *lo = lines;
2400 struct line *hi = lines - nlo;
2401 struct line *sorted_hi = temp - nlo;
2403 sortlines_temp (hi, nhi, sorted_hi);
2405 sortlines (lo, nlo, temp);
2407 mergelines (temp, lo, nlo, sorted_hi, nhi);
2411 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2412 the same as OUTFILE. If found, merge the found instances (and perhaps
2413 some other files) into a temporary file so that it can in turn be
2414 merged into OUTFILE without destroying OUTFILE before it is completely
2415 read. Return the new value of NFILES, which differs from the old if
2416 some merging occurred.
2418 This test ensures that an otherwise-erroneous use like
2419 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2420 It's not clear that POSIX requires this nicety.
2421 Detect common error cases, but don't try to catch obscure cases like
2422 "cat ... FILE ... | sort -m -o FILE"
2423 where traditional "sort" doesn't copy the input and where
2424 people should know that they're getting into trouble anyway.
2425 Catching these obscure cases would slow down performance in
2429 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2430 size_t nfiles, char const *outfile)
2433 bool got_outstat = false;
2434 struct stat outstat;
2436 for (i = ntemps; i < nfiles; i++)
2438 bool is_stdin = STREQ (files[i].name, "-");
2442 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2449 ? stat (outfile, &outstat)
2450 : fstat (STDOUT_FILENO, &outstat))
2457 ? fstat (STDIN_FILENO, &instat)
2458 : stat (files[i].name, &instat))
2460 && SAME_INODE (instat, outstat));
2467 char *temp = create_temp (&tftp, &pid);
2468 mergefps (&files[i],0, nfiles - i, tftp, temp);
2469 files[i].name = temp;
2478 /* Merge the input FILES. NTEMPS is the number of files at the
2479 start of FILES that are temporary; it is zero at the top level.
2480 NFILES is the total number of files. Put the output in
2481 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2484 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2485 char const *output_file)
2487 while (nmerge < nfiles)
2489 /* Number of input files processed so far. */
2492 /* Number of output files generated so far. */
2495 /* nfiles % NMERGE; this counts input files that are left over
2496 after all full-sized merges have been done. */
2499 /* Number of easily-available slots at the next loop iteration. */
2502 /* Do as many NMERGE-size merges as possible. */
2503 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2507 char *temp = create_temp (&tfp, &pid);
2508 size_t nt = MIN (ntemps, nmerge);
2510 mergefps (&files[in], nt, nmerge, tfp, temp);
2511 files[out].name = temp;
2512 files[out].pid = pid;
2515 remainder = nfiles - in;
2516 cheap_slots = nmerge - out % nmerge;
2518 if (cheap_slots < remainder)
2520 /* So many files remain that they can't all be put into the last
2521 NMERGE-sized output window. Do one more merge. Merge as few
2522 files as possible, to avoid needless I/O. */
2523 size_t nshortmerge = remainder - cheap_slots + 1;
2526 char *temp = create_temp (&tfp, &pid);
2527 size_t nt = MIN (ntemps, nshortmerge);
2529 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2530 files[out].name = temp;
2531 files[out++].pid = pid;
2535 /* Put the remaining input files into the last NMERGE-sized output
2536 window, so they will be merged in the next pass. */
2537 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2542 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2543 mergefps (files, ntemps, nfiles, NULL, output_file);
2546 /* Sort NFILES FILES onto OUTPUT_FILE. */
2549 sort (char * const *files, size_t nfiles, char const *output_file)
2553 bool output_file_created = false;
2559 char const *temp_output;
2560 char const *file = *files;
2561 FILE *fp = xfopen (file, "r");
2563 size_t bytes_per_line = (2 * sizeof (struct line)
2564 - sizeof (struct line) / 2);
2567 initbuf (&buf, bytes_per_line,
2568 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2573 while (fillbuf (&buf, fp, file))
2576 struct line *linebase;
2578 if (buf.eof && nfiles
2579 && (bytes_per_line + 1
2580 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2582 /* End of file, but there is more input and buffer room.
2583 Concatenate the next input file; this is faster in
2585 buf.left = buf.used;
2589 line = buffer_linelim (&buf);
2590 linebase = line - buf.nlines;
2592 sortlines (line, buf.nlines, linebase);
2593 if (buf.eof && !nfiles && !ntemps && !buf.left)
2596 tfp = xfopen (output_file, "w");
2597 temp_output = output_file;
2598 output_file_created = true;
2603 temp_output = create_temp (&tfp, NULL);
2609 write_bytes (line->text, line->length, tfp, temp_output);
2611 while (linebase < line && compare (line, line - 1) == 0)
2614 while (linebase < line);
2616 xfclose (tfp, temp_output);
2618 /* Free up some resources every once in a while. */
2619 if (MAX_PROCS_BEFORE_REAP < nprocs)
2622 if (output_file_created)
2631 if (! output_file_created)
2634 struct tempnode *node = temphead;
2635 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2636 for (i = 0; node; i++)
2638 tempfiles[i].name = node->name;
2639 tempfiles[i].pid = node->pid;
2642 merge (tempfiles, ntemps, ntemps, output_file);
2647 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2650 insertkey (struct keyfield *key_arg)
2652 struct keyfield **p;
2653 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2655 for (p = &keylist; *p; p = &(*p)->next)
2661 /* Report a bad field specification SPEC, with extra info MSGID. */
2663 static void badfieldspec (char const *, char const *)
2666 badfieldspec (char const *spec, char const *msgid)
2668 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2669 _(msgid), quote (spec));
2673 /* Report incompatible options. */
2675 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2677 incompatible_options (char const *opts)
2679 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2683 /* Check compatibility of ordering options. */
2686 check_ordering_compatibility (void)
2688 struct keyfield const *key;
2690 for (key = keylist; key; key = key->next)
2691 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2693 || (key->random && key->translate))
2697 if (key->ignore == nondictionary)
2701 if (key->general_numeric)
2703 if (key->ignore == nonprinting)
2712 incompatible_options (opts);
2716 /* Parse the leading integer in STRING and store the resulting value
2717 (which must fit into size_t) into *VAL. Return the address of the
2718 suffix after the integer. If the value is too large, silently
2719 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2720 failure; otherwise, report MSGID and exit on failure. */
2723 parse_field_count (char const *string, size_t *val, char const *msgid)
2728 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2731 case LONGINT_INVALID_SUFFIX_CHAR:
2736 case LONGINT_OVERFLOW:
2737 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2741 case LONGINT_INVALID:
2743 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2744 _(msgid), quote (string));
2751 /* Handle interrupts and hangups. */
2754 sighandler (int sig)
2757 signal (sig, SIG_IGN);
2761 signal (sig, SIG_DFL);
2765 /* Set the ordering options for KEY specified in S.
2766 Return the address of the first character in S that
2767 is not a valid ordering option.
2768 BLANKTYPE is the kind of blanks that 'b' should skip. */
2771 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2778 if (blanktype == bl_start || blanktype == bl_both)
2779 key->skipsblanks = true;
2780 if (blanktype == bl_end || blanktype == bl_both)
2781 key->skipeblanks = true;
2784 key->ignore = nondictionary;
2787 key->translate = fold_toupper;
2790 key->general_numeric = true;
2793 /* Option order should not matter, so don't let -i override
2794 -d. -d implies -i, but -i does not imply -d. */
2796 key->ignore = nonprinting;
2802 key->numeric = true;
2808 key->reverse = true;
2818 static struct keyfield *
2819 key_init (struct keyfield *key)
2821 memset (key, 0, sizeof *key);
2822 key->eword = SIZE_MAX;
2827 main (int argc, char **argv)
2829 struct keyfield *key;
2830 struct keyfield key_buf;
2831 struct keyfield gkey;
2835 bool mergeonly = false;
2836 char *random_source = NULL;
2837 bool need_random = false;
2839 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2840 bool obsolete_usage = (posix2_version () < 200112);
2842 char *files_from = NULL;
2844 char const *outfile = NULL;
2846 initialize_main (&argc, &argv);
2847 set_program_name (argv[0]);
2848 setlocale (LC_ALL, "");
2849 bindtextdomain (PACKAGE, LOCALEDIR);
2850 textdomain (PACKAGE);
2852 initialize_exit_failure (SORT_FAILURE);
2854 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2855 #if HAVE_NL_LANGINFO
2856 hard_LC_TIME = hard_locale (LC_TIME);
2859 /* Get locale's representation of the decimal point. */
2861 struct lconv const *locale = localeconv ();
2863 /* If the locale doesn't define a decimal point, or if the decimal
2864 point is multibyte, use the C locale's decimal point. FIXME:
2865 add support for multibyte decimal points. */
2866 decimal_point = to_uchar (locale->decimal_point[0]);
2867 if (! decimal_point || locale->decimal_point[1])
2868 decimal_point = '.';
2870 /* FIXME: add support for multibyte thousands separators. */
2871 thousands_sep = to_uchar (*locale->thousands_sep);
2872 if (! thousands_sep || locale->thousands_sep[1])
2876 have_read_stdin = false;
2881 static int const sig[] =
2883 /* The usual suspects. */
2884 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2901 enum { nsigs = sizeof sig / sizeof sig[0] };
2904 struct sigaction act;
2906 sigemptyset (&caught_signals);
2907 for (i = 0; i < nsigs; i++)
2909 sigaction (sig[i], NULL, &act);
2910 if (act.sa_handler != SIG_IGN)
2911 sigaddset (&caught_signals, sig[i]);
2914 act.sa_handler = sighandler;
2915 act.sa_mask = caught_signals;
2918 for (i = 0; i < nsigs; i++)
2919 if (sigismember (&caught_signals, sig[i]))
2920 sigaction (sig[i], &act, NULL);
2922 for (i = 0; i < nsigs; i++)
2923 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2925 signal (sig[i], sighandler);
2926 siginterrupt (sig[i], 1);
2931 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2932 atexit (exit_cleanup);
2934 gkey.sword = gkey.eword = SIZE_MAX;
2936 gkey.translate = NULL;
2937 gkey.numeric = gkey.general_numeric = gkey.random = false;
2938 gkey.month = gkey.reverse = false;
2939 gkey.skipsblanks = gkey.skipeblanks = false;
2941 files = xnmalloc (argc, sizeof *files);
2945 /* Parse an operand as a file after "--" was seen; or if
2946 pedantic and a file was seen, unless the POSIX version
2947 predates 1003.1-2001 and -c was not seen and the operand is
2948 "-o FILE" or "-oFILE". */
2952 || (posixly_correct && nfiles != 0
2953 && ! (obsolete_usage
2956 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2957 && (argv[optind][2] || optind + 1 != argc)))
2958 || ((c = getopt_long (argc, argv, short_options,
2964 files[nfiles++] = argv[optind++];
2970 if (optarg[0] == '+')
2972 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2973 && ISDIGIT (argv[optind][1]));
2974 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2977 /* Treat +POS1 [-POS2] as a key if possible; but silently
2978 treat an operand as a file if it is not a valid +POS1. */
2979 key = key_init (&key_buf);
2980 s = parse_field_count (optarg + 1, &key->sword, NULL);
2982 s = parse_field_count (s + 1, &key->schar, NULL);
2983 if (! (key->sword | key->schar))
2984 key->sword = SIZE_MAX;
2985 if (! s || *set_ordering (s, key, bl_start))
2989 if (minus_pos_usage)
2991 char const *optarg1 = argv[optind++];
2992 s = parse_field_count (optarg1 + 1, &key->eword,
2993 N_("invalid number after `-'"));
2995 s = parse_field_count (s + 1, &key->echar,
2996 N_("invalid number after `.'"));
2997 if (*set_ordering (s, key, bl_end))
2998 badfieldspec (optarg1,
2999 N_("stray character in field spec"));
3006 files[nfiles++] = optarg;
3010 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3025 set_ordering (str, &gkey, bl_both);
3031 ? XARGMATCH ("--check", optarg, check_args, check_types)
3036 if (checkonly && checkonly != c)
3037 incompatible_options ("cC");
3041 case COMPRESS_PROGRAM_OPTION:
3042 if (compress_program && !STREQ (compress_program, optarg))
3043 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3044 compress_program = optarg;
3047 case FILES0_FROM_OPTION:
3048 files_from = optarg;
3052 key = key_init (&key_buf);
3055 s = parse_field_count (optarg, &key->sword,
3056 N_("invalid number at field start"));
3059 /* Provoke with `sort -k0' */
3060 badfieldspec (optarg, N_("field number is zero"));
3064 s = parse_field_count (s + 1, &key->schar,
3065 N_("invalid number after `.'"));
3068 /* Provoke with `sort -k1.0' */
3069 badfieldspec (optarg, N_("character offset is zero"));
3072 if (! (key->sword | key->schar))
3073 key->sword = SIZE_MAX;
3074 s = set_ordering (s, key, bl_start);
3077 key->eword = SIZE_MAX;
3083 s = parse_field_count (s + 1, &key->eword,
3084 N_("invalid number after `,'"));
3087 /* Provoke with `sort -k1,0' */
3088 badfieldspec (optarg, N_("field number is zero"));
3091 s = parse_field_count (s + 1, &key->echar,
3092 N_("invalid number after `.'"));
3095 /* `-k 2,3' is equivalent to `+1 -3'. */
3098 s = set_ordering (s, key, bl_end);
3101 badfieldspec (optarg, N_("stray character in field spec"));
3110 specify_nmerge (oi, c, optarg);
3114 if (outfile && !STREQ (outfile, optarg))
3115 error (SORT_FAILURE, 0, _("multiple output files specified"));
3119 case RANDOM_SOURCE_OPTION:
3120 if (random_source && !STREQ (random_source, optarg))
3121 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3122 random_source = optarg;
3130 specify_sort_size (oi, c, optarg);
3135 char newtab = optarg[0];
3137 error (SORT_FAILURE, 0, _("empty tab"));
3140 if (STREQ (optarg, "\\0"))
3144 /* Provoke with `sort -txx'. Complain about
3145 "multi-character tab" instead of "multibyte tab", so
3146 that the diagnostic's wording does not need to be
3147 changed once multibyte characters are supported. */
3148 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3152 if (tab != TAB_DEFAULT && tab != newtab)
3153 error (SORT_FAILURE, 0, _("incompatible tabs"));
3159 add_temp_dir (optarg);
3167 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3168 through Solaris 7. It is also accepted by many non-Solaris
3169 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3170 -y is marked as obsolete starting with Solaris 8 (1999), but is
3171 still accepted as of Solaris 10 prerelease (2004).
3173 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3174 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3175 and which in general ignores the argument after "-y" if it
3176 consists entirely of digits (it can even be empty). */
3177 if (optarg == argv[optind - 1])
3180 for (p = optarg; ISDIGIT (*p); p++)
3182 optind -= (*p != '\0');
3190 case_GETOPT_HELP_CHAR;
3192 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3195 usage (SORT_FAILURE);
3203 /* When using --files0-from=F, you may not specify any files
3204 on the command-line. */
3207 error (0, 0, _("extra operand %s"), quote (files[0]));
3208 fprintf (stderr, "%s\n",
3209 _("file operands cannot be combined with --files0-from"));
3210 usage (SORT_FAILURE);
3213 if (STREQ (files_from, "-"))
3217 stream = fopen (files_from, "r");
3219 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3220 quote (files_from));
3223 readtokens0_init (&tok);
3225 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3226 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3227 quote (files_from));
3235 for (i = 0; i < nfiles; i++)
3237 if (STREQ (files[i], "-"))
3238 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3239 "no file name of %s allowed"),
3241 else if (files[i][0] == '\0')
3243 /* Using the standard `filename:line-number:' prefix here is
3244 not totally appropriate, since NUL is the separator, not NL,
3245 but it might be better than nothing. */
3246 unsigned long int file_number = i + 1;
3247 error (SORT_FAILURE, 0,
3248 _("%s:%lu: invalid zero-length file name"),
3249 quotearg_colon (files_from), file_number);
3254 error (SORT_FAILURE, 0, _("no input from %s"),
3255 quote (files_from));
3258 /* Inheritance of global options to individual keys. */
3259 for (key = keylist; key; key = key->next)
3261 if (! (key->ignore || key->translate
3262 || (key->skipsblanks | key->reverse
3263 | key->skipeblanks | key->month | key->numeric
3264 | key->general_numeric
3267 key->ignore = gkey.ignore;
3268 key->translate = gkey.translate;
3269 key->skipsblanks = gkey.skipsblanks;
3270 key->skipeblanks = gkey.skipeblanks;
3271 key->month = gkey.month;
3272 key->numeric = gkey.numeric;
3273 key->general_numeric = gkey.general_numeric;
3274 key->random = gkey.random;
3275 key->reverse = gkey.reverse;
3278 need_random |= key->random;
3281 if (!keylist && (gkey.ignore || gkey.translate
3282 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3283 | gkey.numeric | gkey.general_numeric
3287 need_random |= gkey.random;
3290 check_ordering_compatibility ();
3292 reverse = gkey.reverse;
3296 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3297 if (! randread_source)
3298 die (_("open failed"), random_source);
3301 if (temp_dir_count == 0)
3303 char const *tmp_dir = getenv ("TMPDIR");
3304 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3309 static char *minus = "-";
3315 /* Need to re-check that we meet the minimum requirement for memory
3316 usage with the final value for NMERGE. */
3318 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3323 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3324 quote (files[1]), checkonly);
3328 static char opts[] = {0, 'o', 0};
3329 opts[0] = checkonly;
3330 incompatible_options (opts);
3333 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3334 input is not properly sorted. */
3335 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3340 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3343 for (i = 0; i < nfiles; ++i)
3344 sortfiles[i].name = files[i];
3346 merge (sortfiles, 0, nfiles, outfile);
3347 IF_LINT (free (sortfiles));
3350 sort (files, nfiles, outfile);
3352 if (have_read_stdin && fclose (stdin) == EOF)
3353 die (_("close failed"), "-");
3355 exit (EXIT_SUCCESS);