1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "hard-locale.h"
41 #include "readtokens0.h"
44 #include "strnumcmp.h"
49 #if HAVE_SYS_RESOURCE_H
50 # include <sys/resource.h>
53 struct rlimit { size_t rlim_cur; };
54 # define getrlimit(Resource, Rlp) (-1)
57 /* The official name of this program (e.g., no `g' prefix). */
58 #define PROGRAM_NAME "sort"
61 proper_name ("Mike Haertel"), \
62 proper_name ("Paul Eggert")
64 #if HAVE_LANGINFO_CODESET
65 # include <langinfo.h>
68 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
71 # define SA_NOCLDSTOP 0
72 /* No sigprocmask. Always 'return' zero. */
73 # define sigprocmask(How, Set, Oset) (0)
75 # if ! HAVE_SIGINTERRUPT
76 # define siginterrupt(sig, flag) /* empty */
84 #define UCHAR_LIM (UCHAR_MAX + 1)
86 #ifndef DEFAULT_TMPDIR
87 # define DEFAULT_TMPDIR "/tmp"
93 /* POSIX says to exit with status 1 if invoked with -c and the
94 input is not properly sorted. */
95 SORT_OUT_OF_ORDER = 1,
97 /* POSIX says any other irregular exit must exit with a status
98 code greater than 1. */
104 /* The number of times we should try to fork a compression process
105 (we retry if the fork call fails). We don't _need_ to compress
106 temp files, this is just to reduce disk access, so this number
108 MAX_FORK_TRIES_COMPRESS = 2,
110 /* The number of times we should try to fork a decompression process.
111 If we can't fork a decompression process, we can't sort, so this
112 number should be big. */
113 MAX_FORK_TRIES_DECOMPRESS = 8
116 /* The representation of the decimal point in the current locale. */
117 static int decimal_point;
119 /* Thousands separator; if -1, then there isn't one. */
120 static int thousands_sep;
122 /* Nonzero if the corresponding locales are hard. */
123 static bool hard_LC_COLLATE;
125 static bool hard_LC_TIME;
128 #define NONZERO(x) ((x) != 0)
130 /* The kind of blanks for '-b' to skip in various options. */
131 enum blanktype { bl_start, bl_end, bl_both };
133 /* The character marking end of line. Default to \n. */
134 static char eolchar = '\n';
136 /* Lines are held in core as counted strings. */
139 char *text; /* Text of the line. */
140 size_t length; /* Length including final newline. */
141 char *keybeg; /* Start of first key. */
142 char *keylim; /* Limit of first key. */
148 char *buf; /* Dynamically allocated buffer,
149 partitioned into 3 regions:
152 - an array of lines, in reverse order. */
153 size_t used; /* Number of bytes used for input data. */
154 size_t nlines; /* Number of lines in the line array. */
155 size_t alloc; /* Number of bytes allocated. */
156 size_t left; /* Number of bytes left from previous reads. */
157 size_t line_bytes; /* Number of bytes to reserve for each line. */
158 bool eof; /* An EOF has been read. */
163 size_t sword; /* Zero-origin 'word' to start at. */
164 size_t schar; /* Additional characters to skip. */
165 size_t eword; /* Zero-origin first word after field. */
166 size_t echar; /* Additional characters in field. */
167 bool const *ignore; /* Boolean array of characters to ignore. */
168 char const *translate; /* Translation applied to characters. */
169 bool skipsblanks; /* Skip leading blanks when finding start. */
170 bool skipeblanks; /* Skip leading blanks when finding end. */
171 bool numeric; /* Flag for numeric comparison. Handle
172 strings of digits with optional decimal
173 point, but no exponential notation. */
174 bool random; /* Sort by random hash of key. */
175 bool general_numeric; /* Flag for general, numeric comparison.
176 Handle numbers in exponential notation. */
177 bool month; /* Flag for comparison by month name. */
178 bool reverse; /* Reverse the sense of comparison. */
179 struct keyfield *next; /* Next keyfield to try. */
188 /* FIXME: None of these tables work with multibyte character sets.
189 Also, there are many other bugs when handling multibyte characters.
190 One way to fix this is to rewrite `sort' to use wide characters
191 internally, but doing this with good performance is a bit
194 /* Table of blanks. */
195 static bool blanks[UCHAR_LIM];
197 /* Table of non-printing characters. */
198 static bool nonprinting[UCHAR_LIM];
200 /* Table of non-dictionary characters (not letters, digits, or blanks). */
201 static bool nondictionary[UCHAR_LIM];
203 /* Translation table folding lower case to upper. */
204 static char fold_toupper[UCHAR_LIM];
206 #define MONTHS_PER_YEAR 12
208 /* Table mapping month names to integers.
209 Alphabetic order allows binary search. */
210 static struct month monthtab[] =
226 /* During the merge phase, the number of files to merge at once. */
229 /* Minimum size for a merge or check buffer. */
230 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
232 /* Minimum sort size; the code might not work with smaller sizes. */
233 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
235 /* The number of bytes needed for a merge or check buffer, which can
236 function relatively efficiently even if it holds only one line. If
237 a longer line is seen, this value is increased. */
238 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
240 /* The approximate maximum number of bytes of main memory to use, as
241 specified by the user. Zero if the user has not specified a size. */
242 static size_t sort_size;
244 /* The guessed size for non-regular files. */
245 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
247 /* Array of directory names in which any temporary files are to be created. */
248 static char const **temp_dirs;
250 /* Number of temporary directory names used. */
251 static size_t temp_dir_count;
253 /* Number of allocated slots in temp_dirs. */
254 static size_t temp_dir_alloc;
256 /* Flag to reverse the order of all comparisons. */
259 /* Flag for stable sort. This turns off the last ditch bytewise
260 comparison of lines, and instead leaves lines in the same order
261 they were read if all keys compare equal. */
264 /* If TAB has this value, blanks separate fields. */
265 enum { TAB_DEFAULT = CHAR_MAX + 1 };
267 /* Tab character separating fields. If TAB_DEFAULT, then fields are
268 separated by the empty string between a non-blank character and a blank
270 static int tab = TAB_DEFAULT;
272 /* Flag to remove consecutive duplicate lines from the output.
273 Only the last of a sequence of equal lines will be output. */
276 /* Nonzero if any of the input files are the standard input. */
277 static bool have_read_stdin;
279 /* List of key field comparisons to be tried. */
280 static struct keyfield *keylist;
282 /* Program used to (de)compress temp files. Must accept -d. */
283 static char const *compress_program;
285 static void sortlines_temp (struct line *, size_t, struct line *);
287 /* Report MESSAGE for FILE, then clean up and exit.
288 If FILE is null, it represents standard output. */
290 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
292 die (char const *message, char const *file)
294 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
301 if (status != EXIT_SUCCESS)
302 fprintf (stderr, _("Try `%s --help' for more information.\n"),
307 Usage: %s [OPTION]... [FILE]...\n\
308 or: %s [OPTION]... --files0-from=F\n\
310 program_name, program_name);
312 Write sorted concatenation of all FILE(s) to standard output.\n\
316 Mandatory arguments to long options are mandatory for short options too.\n\
323 -b, --ignore-leading-blanks ignore leading blanks\n\
324 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
325 -f, --ignore-case fold lower case to upper case characters\n\
328 -g, --general-numeric-sort compare according to general numerical value\n\
329 -i, --ignore-nonprinting consider only printable characters\n\
330 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
331 -n, --numeric-sort compare according to string numerical value\n\
332 -R, --random-sort sort by random hash of keys\n\
333 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
334 --sort=WORD sort according to WORD:\n\
335 general-numeric -g, month -M, numeric -n,\n\
337 -r, --reverse reverse the result of comparisons\n\
343 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
344 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
345 --compress-program=PROG compress temporaries with PROG;\n\
346 decompress them with PROG -d\n\
347 --files0-from=F read input from the files specified by\n\
348 NUL-terminated names in file F\n\
351 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
352 -m, --merge merge already sorted files; do not sort\n\
355 -o, --output=FILE write result to FILE instead of standard output\n\
356 -s, --stable stabilize sort by disabling last-resort comparison\n\
357 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
360 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
361 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
362 multiple options specify multiple directories\n\
363 -u, --unique with -c, check for strict ordering;\n\
364 without -c, output only the first of an equal run\n\
367 -z, --zero-terminated end lines with 0 byte, not newline\n\
369 fputs (HELP_OPTION_DESCRIPTION, stdout);
370 fputs (VERSION_OPTION_DESCRIPTION, stdout);
373 POS is F[.C][OPTS], where F is the field number and C the character position\n\
374 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
375 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
376 one or more single-letter ordering options, which override global ordering\n\
377 options for that key. If no key is given, use the entire line as the key.\n\
379 SIZE may be followed by the following multiplicative suffixes:\n\
382 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
384 With no FILE, or when FILE is -, read standard input.\n\
387 The locale specified by the environment affects sort order.\n\
388 Set LC_ALL=C to get the traditional sort order that uses\n\
389 native byte values.\n\
391 emit_bug_reporting_address ();
397 /* For long options that have no equivalent short option, use a
398 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
401 CHECK_OPTION = CHAR_MAX + 1,
402 COMPRESS_PROGRAM_OPTION,
404 RANDOM_SOURCE_OPTION,
408 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
410 static struct option const long_options[] =
412 {"ignore-leading-blanks", no_argument, NULL, 'b'},
413 {"check", optional_argument, NULL, CHECK_OPTION},
414 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
415 {"dictionary-order", no_argument, NULL, 'd'},
416 {"ignore-case", no_argument, NULL, 'f'},
417 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
418 {"general-numeric-sort", no_argument, NULL, 'g'},
419 {"ignore-nonprinting", no_argument, NULL, 'i'},
420 {"key", required_argument, NULL, 'k'},
421 {"merge", no_argument, NULL, 'm'},
422 {"month-sort", no_argument, NULL, 'M'},
423 {"numeric-sort", no_argument, NULL, 'n'},
424 {"random-sort", no_argument, NULL, 'R'},
425 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
426 {"sort", required_argument, NULL, SORT_OPTION},
427 {"output", required_argument, NULL, 'o'},
428 {"reverse", no_argument, NULL, 'r'},
429 {"stable", no_argument, NULL, 's'},
430 {"buffer-size", required_argument, NULL, 'S'},
431 {"field-separator", required_argument, NULL, 't'},
432 {"temporary-directory", required_argument, NULL, 'T'},
433 {"unique", no_argument, NULL, 'u'},
434 {"zero-terminated", no_argument, NULL, 'z'},
435 {GETOPT_HELP_OPTION_DECL},
436 {GETOPT_VERSION_OPTION_DECL},
440 static char const *const check_args[] =
442 "quiet", "silent", "diagnose-first", NULL
444 static char const check_types[] =
448 ARGMATCH_VERIFY (check_args, check_types);
450 static char const *const sort_args[] =
452 "general-numeric", "month", "numeric", "random", NULL
454 static char const sort_types[] =
458 ARGMATCH_VERIFY (sort_args, sort_types);
460 /* The set of signals that are caught. */
461 static sigset_t caught_signals;
463 /* Critical section status. */
470 /* Enter a critical section. */
471 static struct cs_status
474 struct cs_status status;
475 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
479 /* Leave a critical section. */
481 cs_leave (struct cs_status status)
485 /* Ignore failure when restoring the signal mask. */
486 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
490 /* The list of temporary files. */
493 struct tempnode *volatile next;
494 pid_t pid; /* If compressed, the pid of compressor, else zero */
495 char name[1]; /* Actual size is 1 + file name length. */
497 static struct tempnode *volatile temphead;
498 static struct tempnode *volatile *temptail = &temphead;
503 pid_t pid; /* If compressed, the pid of compressor, else zero */
506 /* A table where we store compression process states. We clean up all
507 processes in a timely manner so as not to exhaust system resources,
508 so we store the info on whether the process is still running, or has
510 static Hash_table *proctab;
512 enum { INIT_PROCTAB_SIZE = 47 };
514 enum procstate { ALIVE, ZOMBIE };
516 /* A proctab entry. The COUNT field is there in case we fork a new
517 compression process that has the same PID as an old zombie process
518 that is still in the table (because the process to decompress the
519 temp file it was associated with hasn't started yet). */
523 enum procstate state;
528 proctab_hasher (const void *entry, size_t tabsize)
530 const struct procnode *node = entry;
531 return node->pid % tabsize;
535 proctab_comparator (const void *e1, const void *e2)
537 const struct procnode *n1 = e1, *n2 = e2;
538 return n1->pid == n2->pid;
541 /* The total number of forked processes (compressors and decompressors)
542 that have not been reaped yet. */
543 static size_t nprocs;
545 /* The number of child processes we'll allow before we try to reap some. */
546 enum { MAX_PROCS_BEFORE_REAP = 2 };
548 /* If 0 < PID, wait for the child process with that PID to exit.
549 If PID is -1, clean up a random child process which has finished and
550 return the process ID of that child. If PID is -1 and no processes
551 have quit yet, return 0 without waiting. */
557 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
560 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
564 if (! WIFEXITED (status) || WEXITSTATUS (status))
565 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
573 /* Add the PID of a running compression process to proctab, or update
574 the entry COUNT and STATE fields if it's already there. This also
575 creates the table for us the first time it's called. */
578 register_proc (pid_t pid)
580 struct procnode test, *node;
584 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
593 node = hash_lookup (proctab, &test);
601 node = xmalloc (sizeof *node);
605 hash_insert (proctab, node);
609 /* This is called when we reap a random process. We don't know
610 whether we have reaped a compression process or a decompression
611 process until we look in the table. If there's an ALIVE entry for
612 it, then we have reaped a compression process, so change the state
613 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
616 update_proc (pid_t pid)
618 struct procnode test, *node;
621 node = hash_lookup (proctab, &test);
623 node->state = ZOMBIE;
626 /* This is for when we need to wait for a compression process to exit.
627 If it has a ZOMBIE entry in the table then it's already dead and has
628 been reaped. Note that if there's an ALIVE entry for it, it still may
629 already have died and been reaped if a second process was created with
630 the same PID. This is probably exceedingly rare, but to be on the safe
631 side we will have to wait for any compression process with this PID. */
634 wait_proc (pid_t pid)
636 struct procnode test, *node;
639 node = hash_lookup (proctab, &test);
640 if (node->state == ALIVE)
643 node->state = ZOMBIE;
646 hash_delete (proctab, node);
651 /* Keep reaping finished children as long as there are more to reap.
652 This doesn't block waiting for any of them, it only reaps those
653 that are already dead. */
660 while (0 < nprocs && (pid = reap (-1)))
664 /* Clean up any remaining temporary files. */
669 struct tempnode const *node;
671 for (node = temphead; node; node = node->next)
676 /* Cleanup actions to take when exiting. */
683 /* Clean up any remaining temporary files in a critical section so
684 that a signal handler does not try to clean them too. */
685 struct cs_status cs = cs_enter ();
693 /* Create a new temporary file, returning its newly allocated tempnode.
694 Store into *PFD the file descriptor open for writing. */
696 static struct tempnode *
697 create_temp_file (int *pfd)
699 static char const slashbase[] = "/sortXXXXXX";
700 static size_t temp_dir_index;
703 char const *temp_dir = temp_dirs[temp_dir_index];
704 size_t len = strlen (temp_dir);
705 struct tempnode *node =
706 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
707 char *file = node->name;
710 memcpy (file, temp_dir, len);
711 memcpy (file + len, slashbase, sizeof slashbase);
714 if (++temp_dir_index == temp_dir_count)
717 /* Create the temporary file in a critical section, to avoid races. */
723 temptail = &node->next;
730 die (_("cannot create temporary file"), file);
736 /* Return a stream for FILE, opened with mode HOW. A null FILE means
737 standard output; HOW should be "w". When opening for input, "-"
738 means standard input. To avoid confusion, do not return file
739 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
740 opening an ordinary FILE. */
743 xfopen (const char *file, const char *how)
749 else if (STREQ (file, "-") && *how == 'r')
751 have_read_stdin = true;
756 fp = fopen (file, how);
758 die (_("open failed"), file);
764 /* Close FP, whose name is FILE, and report any errors. */
767 xfclose (FILE *fp, char const *file)
772 /* Allow reading stdin from tty more than once. */
778 /* Don't close stdout just yet. close_stdout does that. */
779 if (fflush (fp) != 0)
780 die (_("fflush failed"), file);
784 if (fclose (fp) != 0)
785 die (_("close failed"), file);
791 dup2_or_die (int oldfd, int newfd)
793 if (dup2 (oldfd, newfd) < 0)
794 error (SORT_FAILURE, errno, _("dup2 failed"));
797 /* Fork a child process for piping to and do common cleanup. The
798 TRIES parameter tells us how many times to try to fork before
799 giving up. Return the PID of the child or -1 if fork failed. */
802 pipe_fork (int pipefds[2], size_t tries)
804 #if HAVE_WORKING_FORK
805 struct tempnode *saved_temphead;
807 unsigned int wait_retry = 1;
808 pid_t pid IF_LINT (= -1);
811 if (pipe (pipefds) < 0)
816 /* This is so the child process won't delete our temp files
817 if it receives a signal before exec-ing. */
819 saved_temphead = temphead;
825 temphead = saved_temphead;
830 if (0 <= pid || errno != EAGAIN)
847 close (STDIN_FILENO);
848 close (STDOUT_FILENO);
855 #else /* ! HAVE_WORKING_FORK */
860 /* Create a temporary file and start a compression program to filter output
861 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
862 set it to the PID of the newly-created process. */
865 create_temp (FILE **pfp, pid_t *ppid)
868 struct tempnode *node = create_temp_file (&tempfd);
869 char *name = node->name;
871 if (compress_program)
875 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
882 register_proc (node->pid);
884 else if (node->pid == 0)
887 dup2_or_die (tempfd, STDOUT_FILENO);
889 dup2_or_die (pipefds[0], STDIN_FILENO);
892 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
893 error (SORT_FAILURE, errno, _("couldn't execute %s"),
900 *pfp = fdopen (tempfd, "w");
902 die (_("couldn't create temporary file"), name);
910 /* Open a compressed temp file and start a decompression process through
911 which to filter the input. PID must be the valid processes ID of the
912 process used to compress the file. */
915 open_temp (const char *name, pid_t pid)
917 int tempfd, pipefds[2];
923 tempfd = open (name, O_RDONLY);
925 die (_("couldn't open temporary file"), name);
927 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
933 else if (child_pid == 0)
936 dup2_or_die (tempfd, STDIN_FILENO);
938 dup2_or_die (pipefds[1], STDOUT_FILENO);
941 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
942 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
946 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
949 fp = fdopen (pipefds[0], "r");
951 die (_("couldn't create temporary file"), name);
957 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
959 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
960 die (_("write failed"), output_file);
963 /* Append DIR to the array of temporary directory names. */
965 add_temp_dir (char const *dir)
967 if (temp_dir_count == temp_dir_alloc)
968 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
970 temp_dirs[temp_dir_count++] = dir;
973 /* Remove NAME from the list of temporary files. */
976 zaptemp (const char *name)
978 struct tempnode *volatile *pnode;
979 struct tempnode *node;
980 struct tempnode *next;
982 int unlink_errno = 0;
985 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
988 /* Unlink the temporary file in a critical section to avoid races. */
991 unlink_status = unlink (name);
992 unlink_errno = errno;
996 if (unlink_status != 0)
997 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1003 #if HAVE_NL_LANGINFO
1006 struct_month_cmp (const void *m1, const void *m2)
1008 struct month const *month1 = m1;
1009 struct month const *month2 = m2;
1010 return strcmp (month1->name, month2->name);
1015 /* Initialize the character class tables. */
1022 for (i = 0; i < UCHAR_LIM; ++i)
1024 blanks[i] = !! isblank (i);
1025 nonprinting[i] = ! isprint (i);
1026 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1027 fold_toupper[i] = toupper (i);
1030 #if HAVE_NL_LANGINFO
1031 /* If we're not in the "C" locale, read different names for months. */
1034 for (i = 0; i < MONTHS_PER_YEAR; i++)
1041 s = (char *) nl_langinfo (ABMON_1 + i);
1043 monthtab[i].name = name = xmalloc (s_len + 1);
1044 monthtab[i].val = i + 1;
1046 for (j = 0; j < s_len; j++)
1047 name[j] = fold_toupper[to_uchar (s[j])];
1050 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1051 sizeof *monthtab, struct_month_cmp);
1056 /* Specify the amount of main memory to use when sorting. */
1058 specify_sort_size (int oi, char c, char const *s)
1062 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1064 /* The default unit is KiB. */
1065 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1067 if (n <= UINTMAX_MAX / 1024)
1070 e = LONGINT_OVERFLOW;
1073 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1074 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1083 double mem = physmem_total () * n / 100;
1085 /* Use "<", not "<=", to avoid problems with rounding. */
1086 if (mem < UINTMAX_MAX)
1092 e = LONGINT_OVERFLOW;
1097 if (e == LONGINT_OK)
1099 /* If multiple sort sizes are specified, take the maximum, so
1100 that option order does not matter. */
1107 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1111 e = LONGINT_OVERFLOW;
1114 xstrtol_fatal (e, oi, c, long_options, s);
1117 /* Return the default sort size. */
1119 default_sort_size (void)
1121 /* Let MEM be available memory or 1/8 of total memory, whichever
1123 double avail = physmem_available ();
1124 double total = physmem_total ();
1125 double mem = MAX (avail, total / 8);
1126 struct rlimit rlimit;
1128 /* Let SIZE be MEM, but no more than the maximum object size or
1129 system resource limits. Avoid the MIN macro here, as it is not
1130 quite right when only one argument is floating point. Don't
1131 bother to check for values like RLIM_INFINITY since in practice
1132 they are not much less than SIZE_MAX. */
1133 size_t size = SIZE_MAX;
1136 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1137 size = rlimit.rlim_cur;
1139 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1140 size = rlimit.rlim_cur;
1143 /* Leave a large safety margin for the above limits, as failure can
1144 occur when they are exceeded. */
1148 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1149 Exceeding RSS is not fatal, but can be quite slow. */
1150 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1151 size = rlimit.rlim_cur / 16 * 15;
1154 /* Use no less than the minimum. */
1155 return MAX (size, MIN_SORT_SIZE);
1158 /* Return the sort buffer size to use with the input files identified
1159 by FPS and FILES, which are alternate names of the same files.
1160 NFILES gives the number of input files; NFPS may be less. Assume
1161 that each input line requires LINE_BYTES extra bytes' worth of line
1162 information. Do not exceed the size bound specified by the user
1163 (or a default size bound, if the user does not specify one). */
1166 sort_buffer_size (FILE *const *fps, size_t nfps,
1167 char *const *files, size_t nfiles,
1170 /* A bound on the input size. If zero, the bound hasn't been
1172 static size_t size_bound;
1174 /* In the worst case, each input byte is a newline. */
1175 size_t worst_case_per_input_byte = line_bytes + 1;
1177 /* Keep enough room for one extra input line and an extra byte.
1178 This extra room might be needed when preparing to read EOF. */
1179 size_t size = worst_case_per_input_byte + 1;
1183 for (i = 0; i < nfiles; i++)
1189 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1190 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1191 : stat (files[i], &st))
1193 die (_("stat failed"), files[i]);
1195 if (S_ISREG (st.st_mode))
1196 file_size = st.st_size;
1199 /* The file has unknown size. If the user specified a sort
1200 buffer size, use that; otherwise, guess the size. */
1203 file_size = INPUT_FILE_SIZE_GUESS;
1208 size_bound = sort_size;
1210 size_bound = default_sort_size ();
1213 /* Add the amount of memory needed to represent the worst case
1214 where the input consists entirely of newlines followed by a
1215 single non-newline. Check for overflow. */
1216 worst_case = file_size * worst_case_per_input_byte + 1;
1217 if (file_size != worst_case / worst_case_per_input_byte
1218 || size_bound - size <= worst_case)
1226 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1227 must be at least sizeof (struct line). Allocate ALLOC bytes
1231 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1233 /* Ensure that the line array is properly aligned. If the desired
1234 size cannot be allocated, repeatedly halve it until allocation
1235 succeeds. The smaller allocation may hurt overall performance,
1236 but that's better than failing. */
1239 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1240 buf->buf = malloc (alloc);
1244 if (alloc <= line_bytes + 1)
1248 buf->line_bytes = line_bytes;
1250 buf->used = buf->left = buf->nlines = 0;
1254 /* Return one past the limit of the line array. */
1256 static inline struct line *
1257 buffer_linelim (struct buffer const *buf)
1259 return (struct line *) (buf->buf + buf->alloc);
1262 /* Return a pointer to the first character of the field specified
1266 begfield (const struct line *line, const struct keyfield *key)
1268 char *ptr = line->text, *lim = ptr + line->length - 1;
1269 size_t sword = key->sword;
1270 size_t schar = key->schar;
1271 size_t remaining_bytes;
1273 /* The leading field separator itself is included in a field when -t
1276 if (tab != TAB_DEFAULT)
1277 while (ptr < lim && sword--)
1279 while (ptr < lim && *ptr != tab)
1285 while (ptr < lim && sword--)
1287 while (ptr < lim && blanks[to_uchar (*ptr)])
1289 while (ptr < lim && !blanks[to_uchar (*ptr)])
1293 if (key->skipsblanks)
1294 while (ptr < lim && blanks[to_uchar (*ptr)])
1297 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1298 remaining_bytes = lim - ptr;
1299 if (schar < remaining_bytes)
1307 /* Return the limit of (a pointer to the first character after) the field
1308 in LINE specified by KEY. */
1311 limfield (const struct line *line, const struct keyfield *key)
1313 char *ptr = line->text, *lim = ptr + line->length - 1;
1314 size_t eword = key->eword, echar = key->echar;
1315 size_t remaining_bytes;
1317 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1318 whichever comes first. If there are more than EWORD fields, leave
1319 PTR pointing at the beginning of the field having zero-based index,
1320 EWORD. If a delimiter character was specified (via -t), then that
1321 `beginning' is the first character following the delimiting TAB.
1322 Otherwise, leave PTR pointing at the first `blank' character after
1323 the preceding field. */
1324 if (tab != TAB_DEFAULT)
1325 while (ptr < lim && eword--)
1327 while (ptr < lim && *ptr != tab)
1329 if (ptr < lim && (eword | echar))
1333 while (ptr < lim && eword--)
1335 while (ptr < lim && blanks[to_uchar (*ptr)])
1337 while (ptr < lim && !blanks[to_uchar (*ptr)])
1341 #ifdef POSIX_UNSPECIFIED
1342 /* The following block of code makes GNU sort incompatible with
1343 standard Unix sort, so it's ifdef'd out for now.
1344 The POSIX spec isn't clear on how to interpret this.
1345 FIXME: request clarification.
1347 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1348 Date: Thu, 30 May 96 12:20:41 -0400
1349 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1351 [...]I believe I've found another bug in `sort'.
1356 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1359 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1363 Unix sort produced the answer I expected: sort on the single character
1364 in column 7. GNU sort produced different results, because it disagrees
1365 on the interpretation of the key-end spec "M.N". Unix sort reads this
1366 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1367 "skip M-1 fields, then either N-1 characters or the rest of the current
1368 field, whichever comes first". This extra clause applies only to
1369 key-ends, not key-starts.
1372 /* Make LIM point to the end of (one byte past) the current field. */
1373 if (tab != TAB_DEFAULT)
1376 newlim = memchr (ptr, tab, lim - ptr);
1384 while (newlim < lim && blanks[to_uchar (*newlim)])
1386 while (newlim < lim && !blanks[to_uchar (*newlim)])
1392 /* If we're ignoring leading blanks when computing the End
1393 of the field, don't start counting bytes until after skipping
1394 past any leading blanks. */
1395 if (key->skipeblanks)
1396 while (ptr < lim && blanks[to_uchar (*ptr)])
1399 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1400 remaining_bytes = lim - ptr;
1401 if (echar < remaining_bytes)
1409 /* Fill BUF reading from FP, moving buf->left bytes from the end
1410 of buf->buf to the beginning first. If EOF is reached and the
1411 file wasn't terminated by a newline, supply one. Set up BUF's line
1412 table too. FILE is the name of the file corresponding to FP.
1413 Return true if some input was read. */
1416 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1418 struct keyfield const *key = keylist;
1420 size_t line_bytes = buf->line_bytes;
1421 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1426 if (buf->used != buf->left)
1428 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1429 buf->used = buf->left;
1435 char *ptr = buf->buf + buf->used;
1436 struct line *linelim = buffer_linelim (buf);
1437 struct line *line = linelim - buf->nlines;
1438 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1439 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1441 while (line_bytes + 1 < avail)
1443 /* Read as many bytes as possible, but do not read so many
1444 bytes that there might not be enough room for the
1445 corresponding line array. The worst case is when the
1446 rest of the input file consists entirely of newlines,
1447 except that the last byte is not a newline. */
1448 size_t readsize = (avail - 1) / (line_bytes + 1);
1449 size_t bytes_read = fread (ptr, 1, readsize, fp);
1450 char *ptrlim = ptr + bytes_read;
1452 avail -= bytes_read;
1454 if (bytes_read != readsize)
1457 die (_("read failed"), file);
1461 if (buf->buf == ptrlim)
1463 if (ptrlim[-1] != eol)
1468 /* Find and record each line in the just-read input. */
1469 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1473 line->text = line_start;
1474 line->length = ptr - line_start;
1475 mergesize = MAX (mergesize, line->length);
1476 avail -= line_bytes;
1480 /* Precompute the position of the first key for
1482 line->keylim = (key->eword == SIZE_MAX
1484 : limfield (line, key));
1486 if (key->sword != SIZE_MAX)
1487 line->keybeg = begfield (line, key);
1490 if (key->skipsblanks)
1491 while (blanks[to_uchar (*line_start)])
1493 line->keybeg = line_start;
1505 buf->used = ptr - buf->buf;
1506 buf->nlines = buffer_linelim (buf) - line;
1507 if (buf->nlines != 0)
1509 buf->left = ptr - line_start;
1510 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1515 /* The current input line is too long to fit in the buffer.
1516 Double the buffer size and try again, keeping it properly
1518 size_t line_alloc = buf->alloc / sizeof (struct line);
1519 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1520 buf->alloc = line_alloc * sizeof (struct line);
1525 /* Compare strings A and B as numbers without explicitly converting them to
1526 machine numbers. Comparatively slow for short strings, but asymptotically
1530 numcompare (const char *a, const char *b)
1532 while (blanks[to_uchar (*a)])
1534 while (blanks[to_uchar (*b)])
1537 return strnumcmp (a, b, decimal_point, thousands_sep);
1541 general_numcompare (const char *sa, const char *sb)
1543 /* FIXME: add option to warn about failed conversions. */
1544 /* FIXME: maybe add option to try expensive FP conversion
1545 only if A and B can't be compared more cheaply/accurately. */
1549 double a = strtod (sa, &ea);
1550 double b = strtod (sb, &eb);
1552 /* Put conversion errors at the start of the collating sequence. */
1554 return sb == eb ? 0 : -1;
1558 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1559 conversion errors but before numbers; sort them by internal
1560 bit-pattern, for lack of a more portable alternative. */
1566 : memcmp ((char *) &a, (char *) &b, sizeof a));
1569 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1570 Return 0 if the name in S is not recognized. */
1573 getmonth (char const *month, size_t len)
1576 size_t hi = MONTHS_PER_YEAR;
1577 char const *monthlim = month + len;
1581 if (month == monthlim)
1583 if (!blanks[to_uchar (*month)])
1590 size_t ix = (lo + hi) / 2;
1591 char const *m = month;
1592 char const *n = monthtab[ix].name;
1597 return monthtab[ix].val;
1598 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1603 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1615 /* A source of random data. */
1616 static struct randread_source *randread_source;
1618 /* Return the Ith randomly-generated state. The caller must invoke
1619 random_state (H) for all H less than I before invoking random_state
1622 static struct md5_ctx
1623 random_state (size_t i)
1625 /* An array of states resulting from the random data, and counts of
1626 its used and allocated members. */
1627 static struct md5_ctx *state;
1629 static size_t allocated;
1631 struct md5_ctx *s = &state[i];
1635 unsigned char buf[MD5_DIGEST_SIZE];
1641 state = X2NREALLOC (state, &allocated);
1645 randread (randread_source, buf, sizeof buf);
1647 md5_process_bytes (buf, sizeof buf, s);
1653 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1654 with length LENGTHB. Return negative if less, zero if equal,
1655 positive if greater. */
1658 cmp_hashes (char const *texta, size_t lena,
1659 char const *textb, size_t lenb)
1661 /* Try random hashes until a pair of hashes disagree. But if the
1662 first pair of random hashes agree, check whether the keys are
1663 identical and if so report no difference. */
1668 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1669 struct md5_ctx s[2];
1670 s[0] = s[1] = random_state (i);
1671 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1672 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1673 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1676 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1683 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1684 using one or more random hash functions. */
1687 compare_random (char *restrict texta, size_t lena,
1688 char *restrict textb, size_t lenb)
1692 if (! hard_LC_COLLATE)
1693 diff = cmp_hashes (texta, lena, textb, lenb);
1696 /* Transform the text into the basis of comparison, so that byte
1697 strings that would otherwise considered to be equal are
1698 considered equal here even if their bytes differ. */
1701 char stackbuf[4000];
1702 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1703 bool a_fits = tlena <= sizeof stackbuf;
1704 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1705 (a_fits ? sizeof stackbuf - tlena : 0),
1708 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1712 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1713 faster by avoiding the need for an extra buffer copy. */
1714 buf = xmalloc (tlena + tlenb + 1);
1715 xmemxfrm (buf, tlena + 1, texta, lena);
1716 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1719 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1721 if (buf != stackbuf)
1728 /* Compare two lines A and B trying every key in sequence until there
1729 are no more keys or a difference is found. */
1732 keycompare (const struct line *a, const struct line *b)
1734 struct keyfield const *key = keylist;
1736 /* For the first iteration only, the key positions have been
1737 precomputed for us. */
1738 char *texta = a->keybeg;
1739 char *textb = b->keybeg;
1740 char *lima = a->keylim;
1741 char *limb = b->keylim;
1747 char const *translate = key->translate;
1748 bool const *ignore = key->ignore;
1750 /* Find the lengths. */
1751 size_t lena = lima <= texta ? 0 : lima - texta;
1752 size_t lenb = limb <= textb ? 0 : limb - textb;
1754 /* Actually compare the fields. */
1757 diff = compare_random (texta, lena, textb, lenb);
1758 else if (key->numeric | key->general_numeric)
1760 char savea = *lima, saveb = *limb;
1762 *lima = *limb = '\0';
1763 diff = ((key->numeric ? numcompare : general_numcompare)
1765 *lima = savea, *limb = saveb;
1767 else if (key->month)
1768 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1769 /* Sorting like this may become slow, so in a simple locale the user
1770 can select a faster sort that is similar to ascii sort. */
1771 else if (hard_LC_COLLATE)
1773 if (ignore || translate)
1776 size_t size = lena + 1 + lenb + 1;
1777 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1778 char *copy_b = copy_a + lena + 1;
1779 size_t new_len_a, new_len_b, i;
1781 /* Ignore and/or translate chars before comparing. */
1782 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1786 copy_a[new_len_a] = (translate
1787 ? translate[to_uchar (texta[i])]
1789 if (!ignore || !ignore[to_uchar (texta[i])])
1794 copy_b[new_len_b] = (translate
1795 ? translate[to_uchar (textb[i])]
1797 if (!ignore || !ignore[to_uchar (textb[i])])
1802 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1804 if (sizeof buf < size)
1808 diff = - NONZERO (lenb);
1812 diff = xmemcoll (texta, lena, textb, lenb);
1816 #define CMP_WITH_IGNORE(A, B) \
1821 while (texta < lima && ignore[to_uchar (*texta)]) \
1823 while (textb < limb && ignore[to_uchar (*textb)]) \
1825 if (! (texta < lima && textb < limb)) \
1827 diff = to_uchar (A) - to_uchar (B); \
1834 diff = (texta < lima) - (textb < limb); \
1839 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1840 translate[to_uchar (*textb)]);
1842 CMP_WITH_IGNORE (*texta, *textb);
1845 diff = - NONZERO (lenb);
1852 while (texta < lima && textb < limb)
1854 diff = (to_uchar (translate[to_uchar (*texta++)])
1855 - to_uchar (translate[to_uchar (*textb++)]));
1862 diff = memcmp (texta, textb, MIN (lena, lenb));
1866 diff = lena < lenb ? -1 : lena != lenb;
1876 /* Find the beginning and limit of the next field. */
1877 if (key->eword != SIZE_MAX)
1878 lima = limfield (a, key), limb = limfield (b, key);
1880 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1882 if (key->sword != SIZE_MAX)
1883 texta = begfield (a, key), textb = begfield (b, key);
1886 texta = a->text, textb = b->text;
1887 if (key->skipsblanks)
1889 while (texta < lima && blanks[to_uchar (*texta)])
1891 while (textb < limb && blanks[to_uchar (*textb)])
1902 return key->reverse ? -diff : diff;
1905 /* Compare two lines A and B, returning negative, zero, or positive
1906 depending on whether A compares less than, equal to, or greater than B. */
1909 compare (const struct line *a, const struct line *b)
1914 /* First try to compare on the specified keys (if any).
1915 The only two cases with no key at all are unadorned sort,
1916 and unadorned sort -r. */
1919 diff = keycompare (a, b);
1920 if (diff | unique | stable)
1924 /* If the keys all compare equal (or no keys were specified)
1925 fall through to the default comparison. */
1926 alen = a->length - 1, blen = b->length - 1;
1929 diff = - NONZERO (blen);
1932 else if (hard_LC_COLLATE)
1933 diff = xmemcoll (a->text, alen, b->text, blen);
1934 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1935 diff = alen < blen ? -1 : alen != blen;
1937 return reverse ? -diff : diff;
1940 /* Check that the lines read from FILE_NAME come in order. Return
1941 true if they are in order. If CHECKONLY == 'c', also print a
1942 diagnostic (FILE_NAME, line number, contents of line) to stderr if
1943 they are not in order. */
1946 check (char const *file_name, char checkonly)
1948 FILE *fp = xfopen (file_name, "r");
1949 struct buffer buf; /* Input buffer. */
1950 struct line temp; /* Copy of previous line. */
1952 uintmax_t line_number = 0;
1953 struct keyfield const *key = keylist;
1954 bool nonunique = ! unique;
1955 bool ordered = true;
1957 initbuf (&buf, sizeof (struct line),
1958 MAX (merge_buffer_size, sort_size));
1961 while (fillbuf (&buf, fp, file_name))
1963 struct line const *line = buffer_linelim (&buf);
1964 struct line const *linebase = line - buf.nlines;
1966 /* Make sure the line saved from the old buffer contents is
1967 less than or equal to the first line of the new buffer. */
1968 if (alloc && nonunique <= compare (&temp, line - 1))
1972 if (checkonly == 'c')
1974 struct line const *disorder_line = line - 1;
1975 uintmax_t disorder_line_number =
1976 buffer_linelim (&buf) - disorder_line + line_number;
1977 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1978 fprintf (stderr, _("%s: %s:%s: disorder: "),
1979 program_name, file_name,
1980 umaxtostr (disorder_line_number, hr_buf));
1981 write_bytes (disorder_line->text, disorder_line->length,
1982 stderr, _("standard error"));
1990 /* Compare each line in the buffer with its successor. */
1991 while (linebase < --line)
1992 if (nonunique <= compare (line, line - 1))
1993 goto found_disorder;
1995 line_number += buf.nlines;
1997 /* Save the last line of the buffer. */
1998 if (alloc < line->length)
2005 alloc = line->length;
2009 while (alloc < line->length);
2011 temp.text = xrealloc (temp.text, alloc);
2013 memcpy (temp.text, line->text, line->length);
2014 temp.length = line->length;
2017 temp.keybeg = temp.text + (line->keybeg - line->text);
2018 temp.keylim = temp.text + (line->keylim - line->text);
2022 xfclose (fp, file_name);
2028 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2029 files (all of which are at the start of the FILES array), and
2030 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2031 Close input and output files before returning.
2032 OUTPUT_FILE gives the name of the output file. If it is NULL,
2033 the output file is standard output. If OFP is NULL, the output
2034 file has not been opened yet (or written to, if standard output). */
2037 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2038 FILE *ofp, char const *output_file)
2040 FILE *fps[NMERGE]; /* Input streams for each file. */
2041 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2042 struct line saved; /* Saved line storage for unique check. */
2043 struct line const *savedline = NULL;
2044 /* &saved if there is a saved line. */
2045 size_t savealloc = 0; /* Size allocated for the saved line. */
2046 struct line const *cur[NMERGE]; /* Current line in each line table. */
2047 struct line const *base[NMERGE]; /* Base of each line table. */
2048 size_t ord[NMERGE]; /* Table representing a permutation of fps,
2049 such that cur[ord[0]] is the smallest line
2050 and will be next output. */
2054 struct keyfield const *key = keylist;
2057 /* Read initial lines from each input file. */
2058 for (i = 0; i < nfiles; )
2060 fps[i] = (files[i].pid
2061 ? open_temp (files[i].name, files[i].pid)
2062 : xfopen (files[i].name, "r"));
2063 initbuf (&buffer[i], sizeof (struct line),
2064 MAX (merge_buffer_size, sort_size / nfiles));
2065 if (fillbuf (&buffer[i], fps[i], files[i].name))
2067 struct line const *linelim = buffer_linelim (&buffer[i]);
2068 cur[i] = linelim - 1;
2069 base[i] = linelim - buffer[i].nlines;
2074 /* fps[i] is empty; eliminate it from future consideration. */
2075 xfclose (fps[i], files[i].name);
2079 zaptemp (files[i].name);
2081 free (buffer[i].buf);
2083 for (j = i; j < nfiles; ++j)
2084 files[j] = files[j + 1];
2089 ofp = xfopen (output_file, "w");
2091 /* Set up the ord table according to comparisons among input lines.
2092 Since this only reorders two items if one is strictly greater than
2093 the other, it is stable. */
2094 for (i = 0; i < nfiles; ++i)
2096 for (i = 1; i < nfiles; ++i)
2097 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2098 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2100 /* Repeatedly output the smallest line until no input remains. */
2103 struct line const *smallest = cur[ord[0]];
2105 /* If uniquified output is turned on, output only the first of
2106 an identical series of lines. */
2109 if (savedline && compare (savedline, smallest))
2112 write_bytes (saved.text, saved.length, ofp, output_file);
2117 if (savealloc < smallest->length)
2122 savealloc = smallest->length;
2125 while ((savealloc *= 2) < smallest->length);
2127 saved.text = xrealloc (saved.text, savealloc);
2129 saved.length = smallest->length;
2130 memcpy (saved.text, smallest->text, saved.length);
2134 saved.text + (smallest->keybeg - smallest->text);
2136 saved.text + (smallest->keylim - smallest->text);
2141 write_bytes (smallest->text, smallest->length, ofp, output_file);
2143 /* Check if we need to read more lines into core. */
2144 if (base[ord[0]] < smallest)
2145 cur[ord[0]] = smallest - 1;
2148 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2150 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2151 cur[ord[0]] = linelim - 1;
2152 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2156 /* We reached EOF on fps[ord[0]]. */
2157 for (i = 1; i < nfiles; ++i)
2158 if (ord[i] > ord[0])
2161 xfclose (fps[ord[0]], files[ord[0]].name);
2162 if (ord[0] < ntemps)
2165 zaptemp (files[ord[0]].name);
2167 free (buffer[ord[0]].buf);
2168 for (i = ord[0]; i < nfiles; ++i)
2170 fps[i] = fps[i + 1];
2171 files[i] = files[i + 1];
2172 buffer[i] = buffer[i + 1];
2173 cur[i] = cur[i + 1];
2174 base[i] = base[i + 1];
2176 for (i = 0; i < nfiles; ++i)
2177 ord[i] = ord[i + 1];
2182 /* The new line just read in may be larger than other lines
2183 already in main memory; push it back in the queue until we
2184 encounter a line larger than it. Optimize for the common
2185 case where the new line is smallest. */
2190 size_t ord0 = ord[0];
2191 size_t count_of_smaller_lines;
2195 int cmp = compare (cur[ord0], cur[ord[probe]]);
2196 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2200 probe = (lo + hi) / 2;
2203 count_of_smaller_lines = lo - 1;
2204 for (j = 0; j < count_of_smaller_lines; j++)
2205 ord[j] = ord[j + 1];
2206 ord[count_of_smaller_lines] = ord0;
2209 /* Free up some resources every once in a while. */
2210 if (MAX_PROCS_BEFORE_REAP < nprocs)
2214 if (unique && savedline)
2216 write_bytes (saved.text, saved.length, ofp, output_file);
2220 xfclose (ofp, output_file);
2223 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2224 and HI (with NHI members). T, LO, and HI point just past their
2225 respective arrays, and the arrays are in reverse order. NLO and
2226 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2229 mergelines (struct line *t,
2230 struct line const *lo, size_t nlo,
2231 struct line const *hi, size_t nhi)
2234 if (compare (lo - 1, hi - 1) <= 0)
2239 /* HI - NHI equalled T - (NLO + NHI) when this function
2240 began. Therefore HI must equal T now, and there is no
2241 need to copy from HI to T. */
2259 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2260 NLINES must be at least 2.
2261 The input and output arrays are in reverse order, and LINES and
2262 TEMP point just past the end of their respective arrays.
2264 Use a recursive divide-and-conquer algorithm, in the style
2265 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2266 the optimization suggested by exercise 5.2.4-10; this requires room
2267 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2268 writes that this memory optimization was originally published by
2269 D. A. Bell, Comp J. 1 (1958), 75. */
2272 sortlines (struct line *lines, size_t nlines, struct line *temp)
2276 if (0 < compare (&lines[-1], &lines[-2]))
2278 struct line tmp = lines[-1];
2279 lines[-1] = lines[-2];
2285 size_t nlo = nlines / 2;
2286 size_t nhi = nlines - nlo;
2287 struct line *lo = lines;
2288 struct line *hi = lines - nlo;
2289 struct line *sorted_lo = temp;
2291 sortlines (hi, nhi, temp);
2293 sortlines_temp (lo, nlo, sorted_lo);
2295 sorted_lo[-1] = lo[-1];
2297 mergelines (lines, sorted_lo, nlo, hi, nhi);
2301 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2302 rather than sorting in place. */
2305 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2309 /* Declare `swap' as int, not bool, to work around a bug
2310 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2311 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2312 int swap = (0 < compare (&lines[-1], &lines[-2]));
2313 temp[-1] = lines[-1 - swap];
2314 temp[-2] = lines[-2 + swap];
2318 size_t nlo = nlines / 2;
2319 size_t nhi = nlines - nlo;
2320 struct line *lo = lines;
2321 struct line *hi = lines - nlo;
2322 struct line *sorted_hi = temp - nlo;
2324 sortlines_temp (hi, nhi, sorted_hi);
2326 sortlines (lo, nlo, temp);
2328 mergelines (temp, lo, nlo, sorted_hi, nhi);
2332 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2333 the same as OUTFILE. If found, merge the found instances (and perhaps
2334 some other files) into a temporary file so that it can in turn be
2335 merged into OUTFILE without destroying OUTFILE before it is completely
2336 read. Return the new value of NFILES, which differs from the old if
2337 some merging occurred.
2339 This test ensures that an otherwise-erroneous use like
2340 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2341 It's not clear that POSIX requires this nicety.
2342 Detect common error cases, but don't try to catch obscure cases like
2343 "cat ... FILE ... | sort -m -o FILE"
2344 where traditional "sort" doesn't copy the input and where
2345 people should know that they're getting into trouble anyway.
2346 Catching these obscure cases would slow down performance in
2350 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2351 size_t nfiles, char const *outfile)
2354 bool got_outstat = false;
2355 struct stat outstat;
2357 for (i = ntemps; i < nfiles; i++)
2359 bool is_stdin = STREQ (files[i].name, "-");
2363 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2370 ? stat (outfile, &outstat)
2371 : fstat (STDOUT_FILENO, &outstat))
2378 ? fstat (STDIN_FILENO, &instat)
2379 : stat (files[i].name, &instat))
2381 && SAME_INODE (instat, outstat));
2388 char *temp = create_temp (&tftp, &pid);
2389 mergefps (&files[i],0, nfiles - i, tftp, temp);
2390 files[i].name = temp;
2399 /* Merge the input FILES. NTEMPS is the number of files at the
2400 start of FILES that are temporary; it is zero at the top level.
2401 NFILES is the total number of files. Put the output in
2402 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2405 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2406 char const *output_file)
2408 while (NMERGE < nfiles)
2410 /* Number of input files processed so far. */
2413 /* Number of output files generated so far. */
2416 /* nfiles % NMERGE; this counts input files that are left over
2417 after all full-sized merges have been done. */
2420 /* Number of easily-available slots at the next loop iteration. */
2423 /* Do as many NMERGE-size merges as possible. */
2424 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2428 char *temp = create_temp (&tfp, &pid);
2429 size_t nt = MIN (ntemps, NMERGE);
2431 mergefps (&files[in], nt, NMERGE, tfp, temp);
2432 files[out].name = temp;
2433 files[out].pid = pid;
2436 remainder = nfiles - in;
2437 cheap_slots = NMERGE - out % NMERGE;
2439 if (cheap_slots < remainder)
2441 /* So many files remain that they can't all be put into the last
2442 NMERGE-sized output window. Do one more merge. Merge as few
2443 files as possible, to avoid needless I/O. */
2444 size_t nshortmerge = remainder - cheap_slots + 1;
2447 char *temp = create_temp (&tfp, &pid);
2448 size_t nt = MIN (ntemps, nshortmerge);
2450 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2451 files[out].name = temp;
2452 files[out++].pid = pid;
2456 /* Put the remaining input files into the last NMERGE-sized output
2457 window, so they will be merged in the next pass. */
2458 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2463 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2464 mergefps (files, ntemps, nfiles, NULL, output_file);
2467 /* Sort NFILES FILES onto OUTPUT_FILE. */
2470 sort (char * const *files, size_t nfiles, char const *output_file)
2474 bool output_file_created = false;
2480 char const *temp_output;
2481 char const *file = *files;
2482 FILE *fp = xfopen (file, "r");
2484 size_t bytes_per_line = (2 * sizeof (struct line)
2485 - sizeof (struct line) / 2);
2488 initbuf (&buf, bytes_per_line,
2489 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2494 while (fillbuf (&buf, fp, file))
2497 struct line *linebase;
2499 if (buf.eof && nfiles
2500 && (bytes_per_line + 1
2501 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2503 /* End of file, but there is more input and buffer room.
2504 Concatenate the next input file; this is faster in
2506 buf.left = buf.used;
2510 line = buffer_linelim (&buf);
2511 linebase = line - buf.nlines;
2513 sortlines (line, buf.nlines, linebase);
2514 if (buf.eof && !nfiles && !ntemps && !buf.left)
2517 tfp = xfopen (output_file, "w");
2518 temp_output = output_file;
2519 output_file_created = true;
2524 temp_output = create_temp (&tfp, NULL);
2530 write_bytes (line->text, line->length, tfp, temp_output);
2532 while (linebase < line && compare (line, line - 1) == 0)
2535 while (linebase < line);
2537 xfclose (tfp, temp_output);
2539 /* Free up some resources every once in a while. */
2540 if (MAX_PROCS_BEFORE_REAP < nprocs)
2543 if (output_file_created)
2552 if (! output_file_created)
2555 struct tempnode *node = temphead;
2556 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2557 for (i = 0; node; i++)
2559 tempfiles[i].name = node->name;
2560 tempfiles[i].pid = node->pid;
2563 merge (tempfiles, ntemps, ntemps, output_file);
2568 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2571 insertkey (struct keyfield *key_arg)
2573 struct keyfield **p;
2574 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2576 for (p = &keylist; *p; p = &(*p)->next)
2582 /* Report a bad field specification SPEC, with extra info MSGID. */
2584 static void badfieldspec (char const *, char const *)
2587 badfieldspec (char const *spec, char const *msgid)
2589 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2590 _(msgid), quote (spec));
2594 /* Report incompatible options. */
2596 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2598 incompatible_options (char const *opts)
2600 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2604 /* Check compatibility of ordering options. */
2607 check_ordering_compatibility (void)
2609 struct keyfield const *key;
2611 for (key = keylist; key; key = key->next)
2612 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2614 || (key->random && key->translate))
2618 if (key->ignore == nondictionary)
2622 if (key->general_numeric)
2624 if (key->ignore == nonprinting)
2633 incompatible_options (opts);
2637 /* Parse the leading integer in STRING and store the resulting value
2638 (which must fit into size_t) into *VAL. Return the address of the
2639 suffix after the integer. If the value is too large, silently
2640 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2641 failure; otherwise, report MSGID and exit on failure. */
2644 parse_field_count (char const *string, size_t *val, char const *msgid)
2649 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2652 case LONGINT_INVALID_SUFFIX_CHAR:
2657 case LONGINT_OVERFLOW:
2658 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2662 case LONGINT_INVALID:
2664 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2665 _(msgid), quote (string));
2672 /* Handle interrupts and hangups. */
2675 sighandler (int sig)
2678 signal (sig, SIG_IGN);
2682 signal (sig, SIG_DFL);
2686 /* Set the ordering options for KEY specified in S.
2687 Return the address of the first character in S that
2688 is not a valid ordering option.
2689 BLANKTYPE is the kind of blanks that 'b' should skip. */
2692 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2699 if (blanktype == bl_start || blanktype == bl_both)
2700 key->skipsblanks = true;
2701 if (blanktype == bl_end || blanktype == bl_both)
2702 key->skipeblanks = true;
2705 key->ignore = nondictionary;
2708 key->translate = fold_toupper;
2711 key->general_numeric = true;
2714 /* Option order should not matter, so don't let -i override
2715 -d. -d implies -i, but -i does not imply -d. */
2717 key->ignore = nonprinting;
2723 key->numeric = true;
2729 key->reverse = true;
2739 static struct keyfield *
2740 key_init (struct keyfield *key)
2742 memset (key, 0, sizeof *key);
2743 key->eword = SIZE_MAX;
2748 main (int argc, char **argv)
2750 struct keyfield *key;
2751 struct keyfield key_buf;
2752 struct keyfield gkey;
2756 bool mergeonly = false;
2757 char *random_source = NULL;
2758 bool need_random = false;
2760 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2761 bool obsolete_usage = (posix2_version () < 200112);
2763 char *files_from = NULL;
2765 char const *outfile = NULL;
2767 initialize_main (&argc, &argv);
2768 set_program_name (argv[0]);
2769 setlocale (LC_ALL, "");
2770 bindtextdomain (PACKAGE, LOCALEDIR);
2771 textdomain (PACKAGE);
2773 initialize_exit_failure (SORT_FAILURE);
2775 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2776 #if HAVE_NL_LANGINFO
2777 hard_LC_TIME = hard_locale (LC_TIME);
2780 /* Get locale's representation of the decimal point. */
2782 struct lconv const *locale = localeconv ();
2784 /* If the locale doesn't define a decimal point, or if the decimal
2785 point is multibyte, use the C locale's decimal point. FIXME:
2786 add support for multibyte decimal points. */
2787 decimal_point = to_uchar (locale->decimal_point[0]);
2788 if (! decimal_point || locale->decimal_point[1])
2789 decimal_point = '.';
2791 /* FIXME: add support for multibyte thousands separators. */
2792 thousands_sep = to_uchar (*locale->thousands_sep);
2793 if (! thousands_sep || locale->thousands_sep[1])
2797 have_read_stdin = false;
2802 static int const sig[] =
2804 /* The usual suspects. */
2805 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2822 enum { nsigs = sizeof sig / sizeof sig[0] };
2825 struct sigaction act;
2827 sigemptyset (&caught_signals);
2828 for (i = 0; i < nsigs; i++)
2830 sigaction (sig[i], NULL, &act);
2831 if (act.sa_handler != SIG_IGN)
2832 sigaddset (&caught_signals, sig[i]);
2835 act.sa_handler = sighandler;
2836 act.sa_mask = caught_signals;
2839 for (i = 0; i < nsigs; i++)
2840 if (sigismember (&caught_signals, sig[i]))
2841 sigaction (sig[i], &act, NULL);
2843 for (i = 0; i < nsigs; i++)
2844 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2846 signal (sig[i], sighandler);
2847 siginterrupt (sig[i], 1);
2852 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2853 atexit (exit_cleanup);
2855 gkey.sword = gkey.eword = SIZE_MAX;
2857 gkey.translate = NULL;
2858 gkey.numeric = gkey.general_numeric = gkey.random = false;
2859 gkey.month = gkey.reverse = false;
2860 gkey.skipsblanks = gkey.skipeblanks = false;
2862 files = xnmalloc (argc, sizeof *files);
2866 /* Parse an operand as a file after "--" was seen; or if
2867 pedantic and a file was seen, unless the POSIX version
2868 predates 1003.1-2001 and -c was not seen and the operand is
2869 "-o FILE" or "-oFILE". */
2873 || (posixly_correct && nfiles != 0
2874 && ! (obsolete_usage
2877 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2878 && (argv[optind][2] || optind + 1 != argc)))
2879 || ((c = getopt_long (argc, argv, short_options,
2885 files[nfiles++] = argv[optind++];
2891 if (optarg[0] == '+')
2893 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2894 && ISDIGIT (argv[optind][1]));
2895 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2898 /* Treat +POS1 [-POS2] as a key if possible; but silently
2899 treat an operand as a file if it is not a valid +POS1. */
2900 key = key_init (&key_buf);
2901 s = parse_field_count (optarg + 1, &key->sword, NULL);
2903 s = parse_field_count (s + 1, &key->schar, NULL);
2904 if (! (key->sword | key->schar))
2905 key->sword = SIZE_MAX;
2906 if (! s || *set_ordering (s, key, bl_start))
2910 if (minus_pos_usage)
2912 char const *optarg1 = argv[optind++];
2913 s = parse_field_count (optarg1 + 1, &key->eword,
2914 N_("invalid number after `-'"));
2916 s = parse_field_count (s + 1, &key->echar,
2917 N_("invalid number after `.'"));
2918 if (*set_ordering (s, key, bl_end))
2919 badfieldspec (optarg1,
2920 N_("stray character in field spec"));
2927 files[nfiles++] = optarg;
2931 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
2946 set_ordering (str, &gkey, bl_both);
2952 ? XARGMATCH ("--check", optarg, check_args, check_types)
2957 if (checkonly && checkonly != c)
2958 incompatible_options ("cC");
2962 case COMPRESS_PROGRAM_OPTION:
2963 if (compress_program && !STREQ (compress_program, optarg))
2964 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
2965 compress_program = optarg;
2968 case FILES0_FROM_OPTION:
2969 files_from = optarg;
2973 key = key_init (&key_buf);
2976 s = parse_field_count (optarg, &key->sword,
2977 N_("invalid number at field start"));
2980 /* Provoke with `sort -k0' */
2981 badfieldspec (optarg, N_("field number is zero"));
2985 s = parse_field_count (s + 1, &key->schar,
2986 N_("invalid number after `.'"));
2989 /* Provoke with `sort -k1.0' */
2990 badfieldspec (optarg, N_("character offset is zero"));
2993 if (! (key->sword | key->schar))
2994 key->sword = SIZE_MAX;
2995 s = set_ordering (s, key, bl_start);
2998 key->eword = SIZE_MAX;
3004 s = parse_field_count (s + 1, &key->eword,
3005 N_("invalid number after `,'"));
3008 /* Provoke with `sort -k1,0' */
3009 badfieldspec (optarg, N_("field number is zero"));
3012 s = parse_field_count (s + 1, &key->echar,
3013 N_("invalid number after `.'"));
3016 /* `-k 2,3' is equivalent to `+1 -3'. */
3019 s = set_ordering (s, key, bl_end);
3022 badfieldspec (optarg, N_("stray character in field spec"));
3031 if (outfile && !STREQ (outfile, optarg))
3032 error (SORT_FAILURE, 0, _("multiple output files specified"));
3036 case RANDOM_SOURCE_OPTION:
3037 if (random_source && !STREQ (random_source, optarg))
3038 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3039 random_source = optarg;
3047 specify_sort_size (oi, c, optarg);
3052 char newtab = optarg[0];
3054 error (SORT_FAILURE, 0, _("empty tab"));
3057 if (STREQ (optarg, "\\0"))
3061 /* Provoke with `sort -txx'. Complain about
3062 "multi-character tab" instead of "multibyte tab", so
3063 that the diagnostic's wording does not need to be
3064 changed once multibyte characters are supported. */
3065 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3069 if (tab != TAB_DEFAULT && tab != newtab)
3070 error (SORT_FAILURE, 0, _("incompatible tabs"));
3076 add_temp_dir (optarg);
3084 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3085 through Solaris 7. It is also accepted by many non-Solaris
3086 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3087 -y is marked as obsolete starting with Solaris 8 (1999), but is
3088 still accepted as of Solaris 10 prerelease (2004).
3090 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3091 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3092 and which in general ignores the argument after "-y" if it
3093 consists entirely of digits (it can even be empty). */
3094 if (optarg == argv[optind - 1])
3097 for (p = optarg; ISDIGIT (*p); p++)
3099 optind -= (*p != '\0');
3107 case_GETOPT_HELP_CHAR;
3109 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3112 usage (SORT_FAILURE);
3120 /* When using --files0-from=F, you may not specify any files
3121 on the command-line. */
3124 error (0, 0, _("extra operand %s"), quote (files[0]));
3125 fprintf (stderr, "%s\n",
3126 _("file operands cannot be combined with --files0-from"));
3127 usage (SORT_FAILURE);
3130 if (STREQ (files_from, "-"))
3134 stream = fopen (files_from, "r");
3136 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3137 quote (files_from));
3140 readtokens0_init (&tok);
3142 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3143 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3144 quote (files_from));
3152 for (i = 0; i < nfiles; i++)
3154 if (STREQ (files[i], "-"))
3155 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3156 "no file name of %s allowed"),
3158 else if (files[i][0] == '\0')
3160 /* Using the standard `filename:line-number:' prefix here is
3161 not totally appropriate, since NUL is the separator, not NL,
3162 but it might be better than nothing. */
3163 unsigned long int file_number = i + 1;
3164 error (SORT_FAILURE, 0,
3165 _("%s:%lu: invalid zero-length file name"),
3166 quotearg_colon (files_from), file_number);
3171 error (SORT_FAILURE, 0, _("no input from %s"),
3172 quote (files_from));
3175 /* Inheritance of global options to individual keys. */
3176 for (key = keylist; key; key = key->next)
3178 if (! (key->ignore || key->translate
3179 || (key->skipsblanks | key->reverse
3180 | key->skipeblanks | key->month | key->numeric
3181 | key->general_numeric
3184 key->ignore = gkey.ignore;
3185 key->translate = gkey.translate;
3186 key->skipsblanks = gkey.skipsblanks;
3187 key->skipeblanks = gkey.skipeblanks;
3188 key->month = gkey.month;
3189 key->numeric = gkey.numeric;
3190 key->general_numeric = gkey.general_numeric;
3191 key->random = gkey.random;
3192 key->reverse = gkey.reverse;
3195 need_random |= key->random;
3198 if (!keylist && (gkey.ignore || gkey.translate
3199 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3200 | gkey.numeric | gkey.general_numeric
3204 need_random |= gkey.random;
3207 check_ordering_compatibility ();
3209 reverse = gkey.reverse;
3213 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3214 if (! randread_source)
3215 die (_("open failed"), random_source);
3218 if (temp_dir_count == 0)
3220 char const *tmp_dir = getenv ("TMPDIR");
3221 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3226 static char *minus = "-";
3235 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3236 quote (files[1]), checkonly);
3240 static char opts[] = {0, 'o', 0};
3241 opts[0] = checkonly;
3242 incompatible_options (opts);
3245 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3246 input is not properly sorted. */
3247 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3252 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3255 for (i = 0; i < nfiles; ++i)
3256 sortfiles[i].name = files[i];
3258 merge (sortfiles, 0, nfiles, outfile);
3259 IF_LINT (free (sortfiles));
3262 sort (files, nfiles, outfile);
3264 if (have_read_stdin && fclose (stdin) == EOF)
3265 die (_("close failed"), "-");
3267 exit (EXIT_SUCCESS);