1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
31 #include "hard-locale.h"
38 #include "strnumcmp.h"
42 #if HAVE_SYS_RESOURCE_H
43 # include <sys/resource.h>
46 struct rlimit { size_t rlim_cur; };
47 # define getrlimit(Resource, Rlp) (-1)
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "sort"
53 #define AUTHORS "Mike Haertel", "Paul Eggert"
55 #if HAVE_LANGINFO_CODESET
56 # include <langinfo.h>
59 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
62 # define SA_NOCLDSTOP 0
63 # define sigprocmask(How, Set, Oset) /* empty */
65 # if ! HAVE_SIGINTERRUPT
66 # define siginterrupt(sig, flag) /* empty */
74 #define UCHAR_LIM (UCHAR_MAX + 1)
76 #ifndef DEFAULT_TMPDIR
77 # define DEFAULT_TMPDIR "/tmp"
83 /* POSIX says to exit with status 1 if invoked with -c and the
84 input is not properly sorted. */
85 SORT_OUT_OF_ORDER = 1,
87 /* POSIX says any other irregular exit must exit with a status
88 code greater than 1. */
92 /* The representation of the decimal point in the current locale. */
93 static int decimal_point;
95 /* Thousands separator; if -1, then there isn't one. */
96 static int thousands_sep;
98 /* Nonzero if the corresponding locales are hard. */
99 static bool hard_LC_COLLATE;
101 static bool hard_LC_TIME;
104 #define NONZERO(x) ((x) != 0)
106 /* The kind of blanks for '-b' to skip in various options. */
107 enum blanktype { bl_start, bl_end, bl_both };
109 /* The character marking end of line. Default to \n. */
110 static char eolchar = '\n';
112 /* Lines are held in core as counted strings. */
115 char *text; /* Text of the line. */
116 size_t length; /* Length including final newline. */
117 char *keybeg; /* Start of first key. */
118 char *keylim; /* Limit of first key. */
124 char *buf; /* Dynamically allocated buffer,
125 partitioned into 3 regions:
128 - an array of lines, in reverse order. */
129 size_t used; /* Number of bytes used for input data. */
130 size_t nlines; /* Number of lines in the line array. */
131 size_t alloc; /* Number of bytes allocated. */
132 size_t left; /* Number of bytes left from previous reads. */
133 size_t line_bytes; /* Number of bytes to reserve for each line. */
134 bool eof; /* An EOF has been read. */
139 size_t sword; /* Zero-origin 'word' to start at. */
140 size_t schar; /* Additional characters to skip. */
141 size_t eword; /* Zero-origin first word after field. */
142 size_t echar; /* Additional characters in field. */
143 bool const *ignore; /* Boolean array of characters to ignore. */
144 char const *translate; /* Translation applied to characters. */
145 bool skipsblanks; /* Skip leading blanks when finding start. */
146 bool skipeblanks; /* Skip leading blanks when finding end. */
147 bool numeric; /* Flag for numeric comparison. Handle
148 strings of digits with optional decimal
149 point, but no exponential notation. */
150 bool general_numeric; /* Flag for general, numeric comparison.
151 Handle numbers in exponential notation. */
152 bool month; /* Flag for comparison by month name. */
153 bool reverse; /* Reverse the sense of comparison. */
154 struct keyfield *next; /* Next keyfield to try. */
163 /* The name this program was run with. */
166 /* FIXME: None of these tables work with multibyte character sets.
167 Also, there are many other bugs when handling multibyte characters.
168 One way to fix this is to rewrite `sort' to use wide characters
169 internally, but doing this with good performance is a bit
172 /* Table of blanks. */
173 static bool blanks[UCHAR_LIM];
175 /* Table of non-printing characters. */
176 static bool nonprinting[UCHAR_LIM];
178 /* Table of non-dictionary characters (not letters, digits, or blanks). */
179 static bool nondictionary[UCHAR_LIM];
181 /* Translation table folding lower case to upper. */
182 static char fold_toupper[UCHAR_LIM];
184 #define MONTHS_PER_YEAR 12
186 /* Table mapping month names to integers.
187 Alphabetic order allows binary search. */
188 static struct month monthtab[] =
204 /* During the merge phase, the number of files to merge at once. */
207 /* Minimum size for a merge or check buffer. */
208 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
210 /* Minimum sort size; the code might not work with smaller sizes. */
211 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
213 /* The number of bytes needed for a merge or check buffer, which can
214 function relatively efficiently even if it holds only one line. If
215 a longer line is seen, this value is increased. */
216 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
218 /* The approximate maximum number of bytes of main memory to use, as
219 specified by the user. Zero if the user has not specified a size. */
220 static size_t sort_size;
222 /* The guessed size for non-regular files. */
223 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
225 /* Array of directory names in which any temporary files are to be created. */
226 static char const **temp_dirs;
228 /* Number of temporary directory names used. */
229 static size_t temp_dir_count;
231 /* Number of allocated slots in temp_dirs. */
232 static size_t temp_dir_alloc;
234 /* Flag to reverse the order of all comparisons. */
237 /* Flag for stable sort. This turns off the last ditch bytewise
238 comparison of lines, and instead leaves lines in the same order
239 they were read if all keys compare equal. */
242 /* If TAB has this value, blanks separate fields. */
243 enum { TAB_DEFAULT = CHAR_MAX + 1 };
245 /* Tab character separating fields. If TAB_DEFAULT, then fields are
246 separated by the empty string between a non-blank character and a blank
248 static int tab = TAB_DEFAULT;
250 /* Flag to remove consecutive duplicate lines from the output.
251 Only the last of a sequence of equal lines will be output. */
254 /* Nonzero if any of the input files are the standard input. */
255 static bool have_read_stdin;
257 /* List of key field comparisons to be tried. */
258 static struct keyfield *keylist;
260 static void sortlines_temp (struct line *, size_t, struct line *);
262 /* Report MESSAGE for FILE, then clean up and exit.
263 If FILE is null, it represents standard output. */
265 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
267 die (char const *message, char const *file)
269 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
276 if (status != EXIT_SUCCESS)
277 fprintf (stderr, _("Try `%s --help' for more information.\n"),
282 Usage: %s [OPTION]... [FILE]...\n\
286 Write sorted concatenation of all FILE(s) to standard output.\n\
290 Mandatory arguments to long options are mandatory for short options too.\n\
297 -b, --ignore-leading-blanks ignore leading blanks\n\
298 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
299 -f, --ignore-case fold lower case to upper case characters\n\
302 -g, --general-numeric-sort compare according to general numerical value\n\
303 -i, --ignore-nonprinting consider only printable characters\n\
304 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
305 -n, --numeric-sort compare according to string numerical value\n\
306 -r, --reverse reverse the result of comparisons\n\
312 -c, --check check whether input is sorted; do not sort\n\
313 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
314 -m, --merge merge already sorted files; do not sort\n\
315 -o, --output=FILE write result to FILE instead of standard output\n\
316 -s, --stable stabilize sort by disabling last-resort comparison\n\
317 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
320 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
321 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
322 multiple options specify multiple directories\n\
323 -u, --unique with -c, check for strict ordering;\n\
324 without -c, output only the first of an equal run\n\
327 -z, --zero-terminated end lines with 0 byte, not newline\n\
329 fputs (HELP_OPTION_DESCRIPTION, stdout);
330 fputs (VERSION_OPTION_DESCRIPTION, stdout);
333 POS is F[.C][OPTS], where F is the field number and C the character position\n\
334 in the field. If neither the -t nor the -b option is in effect, the characters\n\
335 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
336 one or more single-letter ordering options, which override global ordering\n\
337 options for that key. If no key is given, use the entire line as the key.\n\
339 SIZE may be followed by the following multiplicative suffixes:\n\
342 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
344 With no FILE, or when FILE is -, read standard input.\n\
347 The locale specified by the environment affects sort order.\n\
348 Set LC_ALL=C to get the traditional sort order that uses\n\
349 native byte values.\n\
351 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
357 static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
359 static struct option const long_options[] =
361 {"ignore-leading-blanks", no_argument, NULL, 'b'},
362 {"check", no_argument, NULL, 'c'},
363 {"dictionary-order", no_argument, NULL, 'd'},
364 {"ignore-case", no_argument, NULL, 'f'},
365 {"general-numeric-sort", no_argument, NULL, 'g'},
366 {"ignore-nonprinting", no_argument, NULL, 'i'},
367 {"key", required_argument, NULL, 'k'},
368 {"merge", no_argument, NULL, 'm'},
369 {"month-sort", no_argument, NULL, 'M'},
370 {"numeric-sort", no_argument, NULL, 'n'},
371 {"output", required_argument, NULL, 'o'},
372 {"reverse", no_argument, NULL, 'r'},
373 {"stable", no_argument, NULL, 's'},
374 {"buffer-size", required_argument, NULL, 'S'},
375 {"field-separator", required_argument, NULL, 't'},
376 {"temporary-directory", required_argument, NULL, 'T'},
377 {"unique", no_argument, NULL, 'u'},
378 {"zero-terminated", no_argument, NULL, 'z'},
379 {GETOPT_HELP_OPTION_DECL},
380 {GETOPT_VERSION_OPTION_DECL},
384 /* The set of signals that are caught. */
385 static sigset_t caught_signals;
387 /* The list of temporary files. */
390 struct tempnode *volatile next;
391 char name[1]; /* Actual size is 1 + file name length. */
393 static struct tempnode *volatile temphead;
394 static struct tempnode *volatile *temptail = &temphead;
396 /* Clean up any remaining temporary files. */
401 struct tempnode const *node;
403 for (node = temphead; node; node = node->next)
407 /* Create a new temporary file, returning its newly allocated name.
408 Store into *PFP a stream open for writing. */
411 create_temp_file (FILE **pfp)
413 static char const slashbase[] = "/sortXXXXXX";
414 static size_t temp_dir_index;
418 char const *temp_dir = temp_dirs[temp_dir_index];
419 size_t len = strlen (temp_dir);
420 struct tempnode *node =
421 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
422 char *file = node->name;
424 memcpy (file, temp_dir, len);
425 memcpy (file + len, slashbase, sizeof slashbase);
427 if (++temp_dir_index == temp_dir_count)
430 /* Create the temporary file in a critical section, to avoid races. */
431 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
436 temptail = &node->next;
439 sigprocmask (SIG_SETMASK, &oldset, NULL);
442 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
443 die (_("cannot create temporary file"), file);
448 /* Return a stream for FILE, opened with mode HOW. A null FILE means
449 standard output; HOW should be "w". When opening for input, "-"
450 means standard input. To avoid confusion, do not return file
451 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
452 opening an ordinary FILE. */
455 xfopen (const char *file, const char *how)
461 else if (STREQ (file, "-") && *how == 'r')
463 have_read_stdin = true;
468 fp = fopen (file, how);
470 die (_("open failed"), file);
476 /* Close FP, whose name is FILE, and report any errors. */
479 xfclose (FILE *fp, char const *file)
484 /* Allow reading stdin from tty more than once. */
490 /* Don't close stdout just yet. close_stdout does that. */
491 if (fflush (fp) != 0)
492 die (_("fflush failed"), file);
496 if (fclose (fp) != 0)
497 die (_("close failed"), file);
503 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
505 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
506 die (_("write failed"), output_file);
509 /* Append DIR to the array of temporary directory names. */
511 add_temp_dir (char const *dir)
513 if (temp_dir_count == temp_dir_alloc)
514 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
516 temp_dirs[temp_dir_count++] = dir;
519 /* Remove NAME from the list of temporary files. */
522 zaptemp (const char *name)
524 struct tempnode *volatile *pnode;
525 struct tempnode *node;
526 struct tempnode *next;
529 int unlink_errno = 0;
531 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
534 /* Unlink the temporary file in a critical section to avoid races. */
536 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
537 unlink_status = unlink (name);
538 unlink_errno = errno;
540 sigprocmask (SIG_SETMASK, &oldset, NULL);
542 if (unlink_status != 0)
543 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
552 struct_month_cmp (const void *m1, const void *m2)
554 struct month const *month1 = m1;
555 struct month const *month2 = m2;
556 return strcmp (month1->name, month2->name);
561 /* Initialize the character class tables. */
568 for (i = 0; i < UCHAR_LIM; ++i)
570 blanks[i] = !!ISBLANK (i);
571 nonprinting[i] = !ISPRINT (i);
572 nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
573 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
577 /* If we're not in the "C" locale, read different names for months. */
580 for (i = 0; i < MONTHS_PER_YEAR; i++)
587 s = (char *) nl_langinfo (ABMON_1 + i);
589 monthtab[i].name = name = xmalloc (s_len + 1);
590 monthtab[i].val = i + 1;
592 for (j = 0; j < s_len; j++)
593 name[j] = fold_toupper[to_uchar (s[j])];
596 qsort ((void *) monthtab, MONTHS_PER_YEAR,
597 sizeof *monthtab, struct_month_cmp);
602 /* Specify the amount of main memory to use when sorting. */
604 specify_sort_size (char const *s)
608 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
610 /* The default unit is KiB. */
611 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
613 if (n <= UINTMAX_MAX / 1024)
616 e = LONGINT_OVERFLOW;
619 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
620 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
629 double mem = physmem_total () * n / 100;
631 /* Use "<", not "<=", to avoid problems with rounding. */
632 if (mem < UINTMAX_MAX)
638 e = LONGINT_OVERFLOW;
645 /* If multiple sort sizes are specified, take the maximum, so
646 that option order does not matter. */
653 sort_size = MAX (sort_size, MIN_SORT_SIZE);
657 e = LONGINT_OVERFLOW;
660 STRTOL_FATAL_ERROR (s, _("sort size"), e);
663 /* Return the default sort size. */
665 default_sort_size (void)
667 /* Let MEM be available memory or 1/8 of total memory, whichever
669 double avail = physmem_available ();
670 double total = physmem_total ();
671 double mem = MAX (avail, total / 8);
672 struct rlimit rlimit;
674 /* Let SIZE be MEM, but no more than the maximum object size or
675 system resource limits. Avoid the MIN macro here, as it is not
676 quite right when only one argument is floating point. Don't
677 bother to check for values like RLIM_INFINITY since in practice
678 they are not much less than SIZE_MAX. */
679 size_t size = SIZE_MAX;
682 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
683 size = rlimit.rlim_cur;
685 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
686 size = rlimit.rlim_cur;
689 /* Leave a large safety margin for the above limits, as failure can
690 occur when they are exceeded. */
694 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
695 Exceeding RSS is not fatal, but can be quite slow. */
696 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
697 size = rlimit.rlim_cur / 16 * 15;
700 /* Use no less than the minimum. */
701 return MAX (size, MIN_SORT_SIZE);
704 /* Return the sort buffer size to use with the input files identified
705 by FPS and FILES, which are alternate names of the same files.
706 NFILES gives the number of input files; NFPS may be less. Assume
707 that each input line requires LINE_BYTES extra bytes' worth of line
708 information. Do not exceed the size bound specified by the user
709 (or a default size bound, if the user does not specify one). */
712 sort_buffer_size (FILE *const *fps, size_t nfps,
713 char *const *files, size_t nfiles,
716 /* A bound on the input size. If zero, the bound hasn't been
718 static size_t size_bound;
720 /* In the worst case, each input byte is a newline. */
721 size_t worst_case_per_input_byte = line_bytes + 1;
723 /* Keep enough room for one extra input line and an extra byte.
724 This extra room might be needed when preparing to read EOF. */
725 size_t size = worst_case_per_input_byte + 1;
729 for (i = 0; i < nfiles; i++)
735 if ((i < nfps ? fstat (fileno (fps[i]), &st)
736 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
737 : stat (files[i], &st))
739 die (_("stat failed"), files[i]);
741 if (S_ISREG (st.st_mode))
742 file_size = st.st_size;
745 /* The file has unknown size. If the user specified a sort
746 buffer size, use that; otherwise, guess the size. */
749 file_size = INPUT_FILE_SIZE_GUESS;
754 size_bound = sort_size;
756 size_bound = default_sort_size ();
759 /* Add the amount of memory needed to represent the worst case
760 where the input consists entirely of newlines followed by a
761 single non-newline. Check for overflow. */
762 worst_case = file_size * worst_case_per_input_byte + 1;
763 if (file_size != worst_case / worst_case_per_input_byte
764 || size_bound - size <= worst_case)
772 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
773 must be at least sizeof (struct line). Allocate ALLOC bytes
777 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
779 /* Ensure that the line array is properly aligned. If the desired
780 size cannot be allocated, repeatedly halve it until allocation
781 succeeds. The smaller allocation may hurt overall performance,
782 but that's better than failing. */
785 alloc += sizeof (struct line) - alloc % sizeof (struct line);
786 buf->buf = malloc (alloc);
790 if (alloc <= line_bytes + 1)
794 buf->line_bytes = line_bytes;
796 buf->used = buf->left = buf->nlines = 0;
800 /* Return one past the limit of the line array. */
802 static inline struct line *
803 buffer_linelim (struct buffer const *buf)
805 return (struct line *) (buf->buf + buf->alloc);
808 /* Return a pointer to the first character of the field specified
812 begfield (const struct line *line, const struct keyfield *key)
814 char *ptr = line->text, *lim = ptr + line->length - 1;
815 size_t sword = key->sword;
816 size_t schar = key->schar;
817 size_t remaining_bytes;
819 /* The leading field separator itself is included in a field when -t
822 if (tab != TAB_DEFAULT)
823 while (ptr < lim && sword--)
825 while (ptr < lim && *ptr != tab)
831 while (ptr < lim && sword--)
833 while (ptr < lim && blanks[to_uchar (*ptr)])
835 while (ptr < lim && !blanks[to_uchar (*ptr)])
839 if (key->skipsblanks)
840 while (ptr < lim && blanks[to_uchar (*ptr)])
843 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
844 remaining_bytes = lim - ptr;
845 if (schar < remaining_bytes)
853 /* Return the limit of (a pointer to the first character after) the field
854 in LINE specified by KEY. */
857 limfield (const struct line *line, const struct keyfield *key)
859 char *ptr = line->text, *lim = ptr + line->length - 1;
860 size_t eword = key->eword, echar = key->echar;
861 size_t remaining_bytes;
863 /* Move PTR past EWORD fields or to one past the last byte on LINE,
864 whichever comes first. If there are more than EWORD fields, leave
865 PTR pointing at the beginning of the field having zero-based index,
866 EWORD. If a delimiter character was specified (via -t), then that
867 `beginning' is the first character following the delimiting TAB.
868 Otherwise, leave PTR pointing at the first `blank' character after
869 the preceding field. */
870 if (tab != TAB_DEFAULT)
871 while (ptr < lim && eword--)
873 while (ptr < lim && *ptr != tab)
875 if (ptr < lim && (eword | echar))
879 while (ptr < lim && eword--)
881 while (ptr < lim && blanks[to_uchar (*ptr)])
883 while (ptr < lim && !blanks[to_uchar (*ptr)])
887 #ifdef POSIX_UNSPECIFIED
888 /* The following block of code makes GNU sort incompatible with
889 standard Unix sort, so it's ifdef'd out for now.
890 The POSIX spec isn't clear on how to interpret this.
891 FIXME: request clarification.
893 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
894 Date: Thu, 30 May 96 12:20:41 -0400
895 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
897 [...]I believe I've found another bug in `sort'.
902 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
905 $ /bin/sort -k1.7,1.7 </tmp/sort.in
909 Unix sort produced the answer I expected: sort on the single character
910 in column 7. GNU sort produced different results, because it disagrees
911 on the interpretation of the key-end spec "M.N". Unix sort reads this
912 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
913 "skip M-1 fields, then either N-1 characters or the rest of the current
914 field, whichever comes first". This extra clause applies only to
915 key-ends, not key-starts.
918 /* Make LIM point to the end of (one byte past) the current field. */
919 if (tab != TAB_DEFAULT)
922 newlim = memchr (ptr, tab, lim - ptr);
930 while (newlim < lim && blanks[to_uchar (*newlim)])
932 while (newlim < lim && !blanks[to_uchar (*newlim)])
938 /* If we're ignoring leading blanks when computing the End
939 of the field, don't start counting bytes until after skipping
940 past any leading blanks. */
941 if (key->skipeblanks)
942 while (ptr < lim && blanks[to_uchar (*ptr)])
945 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
946 remaining_bytes = lim - ptr;
947 if (echar < remaining_bytes)
955 /* Fill BUF reading from FP, moving buf->left bytes from the end
956 of buf->buf to the beginning first. If EOF is reached and the
957 file wasn't terminated by a newline, supply one. Set up BUF's line
958 table too. FILE is the name of the file corresponding to FP.
959 Return true if some input was read. */
962 fillbuf (struct buffer *buf, FILE *fp, char const *file)
964 struct keyfield const *key = keylist;
966 size_t line_bytes = buf->line_bytes;
967 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
972 if (buf->used != buf->left)
974 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
975 buf->used = buf->left;
981 char *ptr = buf->buf + buf->used;
982 struct line *linelim = buffer_linelim (buf);
983 struct line *line = linelim - buf->nlines;
984 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
985 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
987 while (line_bytes + 1 < avail)
989 /* Read as many bytes as possible, but do not read so many
990 bytes that there might not be enough room for the
991 corresponding line array. The worst case is when the
992 rest of the input file consists entirely of newlines,
993 except that the last byte is not a newline. */
994 size_t readsize = (avail - 1) / (line_bytes + 1);
995 size_t bytes_read = fread (ptr, 1, readsize, fp);
996 char *ptrlim = ptr + bytes_read;
1000 if (bytes_read != readsize)
1003 die (_("read failed"), file);
1007 if (buf->buf == ptrlim)
1009 if (ptrlim[-1] != eol)
1014 /* Find and record each line in the just-read input. */
1015 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1019 line->text = line_start;
1020 line->length = ptr - line_start;
1021 mergesize = MAX (mergesize, line->length);
1022 avail -= line_bytes;
1026 /* Precompute the position of the first key for
1028 line->keylim = (key->eword == SIZE_MAX
1030 : limfield (line, key));
1032 if (key->sword != SIZE_MAX)
1033 line->keybeg = begfield (line, key);
1036 if (key->skipsblanks)
1037 while (blanks[to_uchar (*line_start)])
1039 line->keybeg = line_start;
1051 buf->used = ptr - buf->buf;
1052 buf->nlines = buffer_linelim (buf) - line;
1053 if (buf->nlines != 0)
1055 buf->left = ptr - line_start;
1056 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1060 /* The current input line is too long to fit in the buffer.
1061 Double the buffer size and try again. */
1062 buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1066 /* Compare strings A and B as numbers without explicitly converting them to
1067 machine numbers. Comparatively slow for short strings, but asymptotically
1071 numcompare (const char *a, const char *b)
1073 while (blanks[to_uchar (*a)])
1075 while (blanks[to_uchar (*b)])
1078 return strnumcmp (a, b, decimal_point, thousands_sep);
1082 general_numcompare (const char *sa, const char *sb)
1084 /* FIXME: add option to warn about failed conversions. */
1085 /* FIXME: maybe add option to try expensive FP conversion
1086 only if A and B can't be compared more cheaply/accurately. */
1090 double a = strtod (sa, &ea);
1091 double b = strtod (sb, &eb);
1093 /* Put conversion errors at the start of the collating sequence. */
1095 return sb == eb ? 0 : -1;
1099 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1100 conversion errors but before numbers; sort them by internal
1101 bit-pattern, for lack of a more portable alternative. */
1107 : memcmp ((char *) &a, (char *) &b, sizeof a));
1110 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1111 Return 0 if the name in S is not recognized. */
1114 getmonth (char const *month, size_t len)
1117 size_t hi = MONTHS_PER_YEAR;
1118 char const *monthlim = month + len;
1122 if (month == monthlim)
1124 if (!blanks[to_uchar (*month)])
1131 size_t ix = (lo + hi) / 2;
1132 char const *m = month;
1133 char const *n = monthtab[ix].name;
1138 return monthtab[ix].val;
1139 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1144 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1156 /* Compare two lines A and B trying every key in sequence until there
1157 are no more keys or a difference is found. */
1160 keycompare (const struct line *a, const struct line *b)
1162 struct keyfield const *key = keylist;
1164 /* For the first iteration only, the key positions have been
1165 precomputed for us. */
1166 char *texta = a->keybeg;
1167 char *textb = b->keybeg;
1168 char *lima = a->keylim;
1169 char *limb = b->keylim;
1175 char const *translate = key->translate;
1176 bool const *ignore = key->ignore;
1178 /* Find the lengths. */
1179 size_t lena = lima <= texta ? 0 : lima - texta;
1180 size_t lenb = limb <= textb ? 0 : limb - textb;
1182 /* Actually compare the fields. */
1183 if (key->numeric | key->general_numeric)
1185 char savea = *lima, saveb = *limb;
1187 *lima = *limb = '\0';
1188 diff = ((key->numeric ? numcompare : general_numcompare)
1190 *lima = savea, *limb = saveb;
1192 else if (key->month)
1193 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1194 /* Sorting like this may become slow, so in a simple locale the user
1195 can select a faster sort that is similar to ascii sort. */
1196 else if (hard_LC_COLLATE)
1198 if (ignore || translate)
1201 size_t size = lena + 1 + lenb + 1;
1202 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1203 char *copy_b = copy_a + lena + 1;
1204 size_t new_len_a, new_len_b, i;
1206 /* Ignore and/or translate chars before comparing. */
1207 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1211 copy_a[new_len_a] = (translate
1212 ? translate[to_uchar (texta[i])]
1214 if (!ignore || !ignore[to_uchar (texta[i])])
1219 copy_b[new_len_b] = (translate
1220 ? translate[to_uchar (textb[i])]
1222 if (!ignore || !ignore[to_uchar (textb[i])])
1227 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1229 if (sizeof buf < size)
1233 diff = - NONZERO (lenb);
1237 diff = xmemcoll (texta, lena, textb, lenb);
1241 #define CMP_WITH_IGNORE(A, B) \
1246 while (texta < lima && ignore[to_uchar (*texta)]) \
1248 while (textb < limb && ignore[to_uchar (*textb)]) \
1250 if (! (texta < lima && textb < limb)) \
1252 diff = to_uchar (A) - to_uchar (B); \
1259 diff = (texta < lima) - (textb < limb); \
1264 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1265 translate[to_uchar (*textb)]);
1267 CMP_WITH_IGNORE (*texta, *textb);
1270 diff = - NONZERO (lenb);
1277 while (texta < lima && textb < limb)
1279 diff = (to_uchar (translate[to_uchar (*texta++)])
1280 - to_uchar (translate[to_uchar (*textb++)]));
1287 diff = memcmp (texta, textb, MIN (lena, lenb));
1291 diff = lena < lenb ? -1 : lena != lenb;
1301 /* Find the beginning and limit of the next field. */
1302 if (key->eword != SIZE_MAX)
1303 lima = limfield (a, key), limb = limfield (b, key);
1305 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1307 if (key->sword != SIZE_MAX)
1308 texta = begfield (a, key), textb = begfield (b, key);
1311 texta = a->text, textb = b->text;
1312 if (key->skipsblanks)
1314 while (texta < lima && blanks[to_uchar (*texta)])
1316 while (textb < limb && blanks[to_uchar (*textb)])
1327 return key->reverse ? -diff : diff;
1330 /* Compare two lines A and B, returning negative, zero, or positive
1331 depending on whether A compares less than, equal to, or greater than B. */
1334 compare (const struct line *a, const struct line *b)
1339 /* First try to compare on the specified keys (if any).
1340 The only two cases with no key at all are unadorned sort,
1341 and unadorned sort -r. */
1344 diff = keycompare (a, b);
1345 if (diff | unique | stable)
1349 /* If the keys all compare equal (or no keys were specified)
1350 fall through to the default comparison. */
1351 alen = a->length - 1, blen = b->length - 1;
1354 diff = - NONZERO (blen);
1357 else if (hard_LC_COLLATE)
1358 diff = xmemcoll (a->text, alen, b->text, blen);
1359 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1360 diff = alen < blen ? -1 : alen != blen;
1362 return reverse ? -diff : diff;
1365 /* Check that the lines read from FILE_NAME come in order. Print a
1366 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1367 false if they are not in order. Otherwise, print no diagnostic
1371 check (char const *file_name)
1373 FILE *fp = xfopen (file_name, "r");
1374 struct buffer buf; /* Input buffer. */
1375 struct line temp; /* Copy of previous line. */
1377 uintmax_t line_number = 0;
1378 struct keyfield const *key = keylist;
1379 bool nonunique = ! unique;
1380 bool ordered = true;
1382 initbuf (&buf, sizeof (struct line),
1383 MAX (merge_buffer_size, sort_size));
1386 while (fillbuf (&buf, fp, file_name))
1388 struct line const *line = buffer_linelim (&buf);
1389 struct line const *linebase = line - buf.nlines;
1391 /* Make sure the line saved from the old buffer contents is
1392 less than or equal to the first line of the new buffer. */
1393 if (alloc && nonunique <= compare (&temp, line - 1))
1397 struct line const *disorder_line = line - 1;
1398 uintmax_t disorder_line_number =
1399 buffer_linelim (&buf) - disorder_line + line_number;
1400 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1401 fprintf (stderr, _("%s: %s:%s: disorder: "),
1402 program_name, file_name,
1403 umaxtostr (disorder_line_number, hr_buf));
1404 write_bytes (disorder_line->text, disorder_line->length, stderr,
1405 _("standard error"));
1411 /* Compare each line in the buffer with its successor. */
1412 while (linebase < --line)
1413 if (nonunique <= compare (line, line - 1))
1414 goto found_disorder;
1416 line_number += buf.nlines;
1418 /* Save the last line of the buffer. */
1419 if (alloc < line->length)
1426 alloc = line->length;
1430 while (alloc < line->length);
1432 temp.text = xrealloc (temp.text, alloc);
1434 memcpy (temp.text, line->text, line->length);
1435 temp.length = line->length;
1438 temp.keybeg = temp.text + (line->keybeg - line->text);
1439 temp.keylim = temp.text + (line->keylim - line->text);
1443 xfclose (fp, file_name);
1449 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
1450 files (all of which are at the start of the FILES array), and
1451 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
1452 Close input and output files before returning.
1453 OUTPUT_FILE gives the name of the output file. If it is NULL,
1454 the output file is standard output. If OFP is NULL, the output
1455 file has not been opened yet (or written to, if standard output). */
1458 mergefps (char **files, size_t ntemps, size_t nfiles,
1459 FILE *ofp, char const *output_file)
1461 FILE *fps[NMERGE]; /* Input streams for each file. */
1462 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1463 struct line saved; /* Saved line storage for unique check. */
1464 struct line const *savedline = NULL;
1465 /* &saved if there is a saved line. */
1466 size_t savealloc = 0; /* Size allocated for the saved line. */
1467 struct line const *cur[NMERGE]; /* Current line in each line table. */
1468 struct line const *base[NMERGE]; /* Base of each line table. */
1469 size_t ord[NMERGE]; /* Table representing a permutation of fps,
1470 such that cur[ord[0]] is the smallest line
1471 and will be next output. */
1475 struct keyfield const *key = keylist;
1478 /* Read initial lines from each input file. */
1479 for (i = 0; i < nfiles; )
1481 fps[i] = xfopen (files[i], "r");
1482 initbuf (&buffer[i], sizeof (struct line),
1483 MAX (merge_buffer_size, sort_size / nfiles));
1484 if (fillbuf (&buffer[i], fps[i], files[i]))
1486 struct line const *linelim = buffer_linelim (&buffer[i]);
1487 cur[i] = linelim - 1;
1488 base[i] = linelim - buffer[i].nlines;
1493 /* fps[i] is empty; eliminate it from future consideration. */
1494 xfclose (fps[i], files[i]);
1500 free (buffer[i].buf);
1502 for (j = i; j < nfiles; ++j)
1503 files[j] = files[j + 1];
1508 ofp = xfopen (output_file, "w");
1510 /* Set up the ord table according to comparisons among input lines.
1511 Since this only reorders two items if one is strictly greater than
1512 the other, it is stable. */
1513 for (i = 0; i < nfiles; ++i)
1515 for (i = 1; i < nfiles; ++i)
1516 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1517 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1519 /* Repeatedly output the smallest line until no input remains. */
1522 struct line const *smallest = cur[ord[0]];
1524 /* If uniquified output is turned on, output only the first of
1525 an identical series of lines. */
1528 if (savedline && compare (savedline, smallest))
1531 write_bytes (saved.text, saved.length, ofp, output_file);
1536 if (savealloc < smallest->length)
1541 savealloc = smallest->length;
1544 while ((savealloc *= 2) < smallest->length);
1546 saved.text = xrealloc (saved.text, savealloc);
1548 saved.length = smallest->length;
1549 memcpy (saved.text, smallest->text, saved.length);
1553 saved.text + (smallest->keybeg - smallest->text);
1555 saved.text + (smallest->keylim - smallest->text);
1560 write_bytes (smallest->text, smallest->length, ofp, output_file);
1562 /* Check if we need to read more lines into core. */
1563 if (base[ord[0]] < smallest)
1564 cur[ord[0]] = smallest - 1;
1567 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1569 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1570 cur[ord[0]] = linelim - 1;
1571 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1575 /* We reached EOF on fps[ord[0]]. */
1576 for (i = 1; i < nfiles; ++i)
1577 if (ord[i] > ord[0])
1580 xfclose (fps[ord[0]], files[ord[0]]);
1581 if (ord[0] < ntemps)
1584 zaptemp (files[ord[0]]);
1586 free (buffer[ord[0]].buf);
1587 for (i = ord[0]; i < nfiles; ++i)
1589 fps[i] = fps[i + 1];
1590 files[i] = files[i + 1];
1591 buffer[i] = buffer[i + 1];
1592 cur[i] = cur[i + 1];
1593 base[i] = base[i + 1];
1595 for (i = 0; i < nfiles; ++i)
1596 ord[i] = ord[i + 1];
1601 /* The new line just read in may be larger than other lines
1602 already in main memory; push it back in the queue until we
1603 encounter a line larger than it. Optimize for the common
1604 case where the new line is smallest. */
1609 size_t ord0 = ord[0];
1610 size_t count_of_smaller_lines;
1614 int cmp = compare (cur[ord0], cur[ord[probe]]);
1615 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
1619 probe = (lo + hi) / 2;
1622 count_of_smaller_lines = lo - 1;
1623 for (j = 0; j < count_of_smaller_lines; j++)
1624 ord[j] = ord[j + 1];
1625 ord[count_of_smaller_lines] = ord0;
1629 if (unique && savedline)
1631 write_bytes (saved.text, saved.length, ofp, output_file);
1635 xfclose (ofp, output_file);
1638 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1639 and HI (with NHI members). T, LO, and HI point just past their
1640 respective arrays, and the arrays are in reverse order. NLO and
1641 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
1644 mergelines (struct line *t,
1645 struct line const *lo, size_t nlo,
1646 struct line const *hi, size_t nhi)
1649 if (compare (lo - 1, hi - 1) <= 0)
1654 /* HI - NHI equalled T - (NLO + NHI) when this function
1655 began. Therefore HI must equal T now, and there is no
1656 need to copy from HI to T. */
1674 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1675 NLINES must be at least 2.
1676 The input and output arrays are in reverse order, and LINES and
1677 TEMP point just past the end of their respective arrays.
1679 Use a recursive divide-and-conquer algorithm, in the style
1680 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
1681 the optimization suggested by exercise 5.2.4-10; this requires room
1682 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
1683 writes that this memory optimization was originally published by
1684 D. A. Bell, Comp J. 1 (1958), 75. */
1687 sortlines (struct line *lines, size_t nlines, struct line *temp)
1691 if (0 < compare (&lines[-1], &lines[-2]))
1693 struct line tmp = lines[-1];
1694 lines[-1] = lines[-2];
1700 size_t nlo = nlines / 2;
1701 size_t nhi = nlines - nlo;
1702 struct line *lo = lines;
1703 struct line *hi = lines - nlo;
1704 struct line *sorted_lo = temp;
1706 sortlines (hi, nhi, temp);
1708 sortlines_temp (lo, nlo, sorted_lo);
1710 sorted_lo[-1] = lo[-1];
1712 mergelines (lines, sorted_lo, nlo, hi, nhi);
1716 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1717 rather than sorting in place. */
1720 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1724 /* Declare `swap' as int, not bool, to work around a bug
1725 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
1726 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
1727 int swap = (0 < compare (&lines[-1], &lines[-2]));
1728 temp[-1] = lines[-1 - swap];
1729 temp[-2] = lines[-2 + swap];
1733 size_t nlo = nlines / 2;
1734 size_t nhi = nlines - nlo;
1735 struct line *lo = lines;
1736 struct line *hi = lines - nlo;
1737 struct line *sorted_hi = temp - nlo;
1739 sortlines_temp (hi, nhi, sorted_hi);
1741 sortlines (lo, nlo, temp);
1743 mergelines (temp, lo, nlo, sorted_hi, nhi);
1747 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
1748 the same as OUTFILE. If found, merge the found instances (and perhaps
1749 some other files) into a temporary file so that it can in turn be
1750 merged into OUTFILE without destroying OUTFILE before it is completely
1751 read. Return the new value of NFILES, which differs from the old if
1752 some merging occurred.
1754 This test ensures that an otherwise-erroneous use like
1755 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
1756 It's not clear that POSIX requires this nicety.
1757 Detect common error cases, but don't try to catch obscure cases like
1758 "cat ... FILE ... | sort -m -o FILE"
1759 where traditional "sort" doesn't copy the input and where
1760 people should know that they're getting into trouble anyway.
1761 Catching these obscure cases would slow down performance in
1765 avoid_trashing_input (char **files, size_t ntemps, size_t nfiles,
1766 char const *outfile)
1769 bool got_outstat = false;
1770 struct stat outstat;
1772 for (i = ntemps; i < nfiles; i++)
1774 bool is_stdin = STREQ (files[i], "-");
1778 if (outfile && STREQ (outfile, files[i]) && !is_stdin)
1785 ? stat (outfile, &outstat)
1786 : fstat (STDOUT_FILENO, &outstat))
1793 ? fstat (STDIN_FILENO, &instat)
1794 : stat (files[i], &instat))
1796 && SAME_INODE (instat, outstat));
1802 char *temp = create_temp_file (&tftp);
1803 mergefps (&files[i], 0, nfiles - i, tftp, temp);
1812 /* Merge the input FILES. NTEMPS is the number of files at the
1813 start of FILES that are temporary; it is zero at the top level.
1814 NFILES is the total number of files. Put the output in
1815 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
1818 merge (char **files, size_t ntemps, size_t nfiles, char const *output_file)
1820 while (NMERGE < nfiles)
1822 /* Number of input files processed so far. */
1825 /* Number of output files generated so far. */
1828 /* nfiles % NMERGE; this counts input files that are left over
1829 after all full-sized merges have been done. */
1832 /* Number of easily-available slots at the next loop iteration. */
1835 /* Do as many NMERGE-size merges as possible. */
1836 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
1839 char *temp = create_temp_file (&tfp);
1840 size_t nt = MIN (ntemps, NMERGE);
1842 mergefps (&files[in], nt, NMERGE, tfp, temp);
1846 remainder = nfiles - in;
1847 cheap_slots = NMERGE - out % NMERGE;
1849 if (cheap_slots < remainder)
1851 /* So many files remain that they can't all be put into the last
1852 NMERGE-sized output window. Do one more merge. Merge as few
1853 files as possible, to avoid needless I/O. */
1854 size_t nshortmerge = remainder - cheap_slots + 1;
1856 char *temp = create_temp_file (&tfp);
1857 size_t nt = MIN (ntemps, nshortmerge);
1859 mergefps (&files[in], nt, nshortmerge, tfp, temp);
1860 files[out++] = temp;
1864 /* Put the remaining input files into the last NMERGE-sized output
1865 window, so they will be merged in the next pass. */
1866 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
1871 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
1872 mergefps (files, ntemps, nfiles, NULL, output_file);
1875 /* Sort NFILES FILES onto OUTPUT_FILE. */
1878 sort (char * const *files, size_t nfiles, char const *output_file)
1882 bool output_file_created = false;
1888 char const *temp_output;
1889 char const *file = *files;
1890 FILE *fp = xfopen (file, "r");
1892 size_t bytes_per_line = (2 * sizeof (struct line)
1893 - sizeof (struct line) / 2);
1896 initbuf (&buf, bytes_per_line,
1897 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
1902 while (fillbuf (&buf, fp, file))
1905 struct line *linebase;
1907 if (buf.eof && nfiles
1908 && (bytes_per_line + 1
1909 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
1911 /* End of file, but there is more input and buffer room.
1912 Concatenate the next input file; this is faster in
1914 buf.left = buf.used;
1918 line = buffer_linelim (&buf);
1919 linebase = line - buf.nlines;
1921 sortlines (line, buf.nlines, linebase);
1922 if (buf.eof && !nfiles && !ntemps && !buf.left)
1925 tfp = xfopen (output_file, "w");
1926 temp_output = output_file;
1927 output_file_created = true;
1932 temp_output = create_temp_file (&tfp);
1938 write_bytes (line->text, line->length, tfp, temp_output);
1940 while (linebase < line && compare (line, line - 1) == 0)
1943 while (linebase < line);
1945 xfclose (tfp, temp_output);
1947 if (output_file_created)
1956 if (! output_file_created)
1959 struct tempnode *node = temphead;
1960 char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
1961 for (i = 0; node; i++)
1963 tempfiles[i] = node->name;
1966 merge (tempfiles, ntemps, ntemps, output_file);
1971 /* Insert key KEY at the end of the key list. */
1974 insertkey (struct keyfield *key)
1976 struct keyfield **p;
1978 for (p = &keylist; *p; p = &(*p)->next)
1984 /* Report a bad field specification SPEC, with extra info MSGID. */
1986 static void badfieldspec (char const *, char const *)
1989 badfieldspec (char const *spec, char const *msgid)
1991 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
1992 _(msgid), quote (spec));
1996 /* Parse the leading integer in STRING and store the resulting value
1997 (which must fit into size_t) into *VAL. Return the address of the
1998 suffix after the integer. If MSGID is NULL, return NULL after
1999 failure; otherwise, report MSGID and exit on failure. */
2002 parse_field_count (char const *string, size_t *val, char const *msgid)
2007 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2010 case LONGINT_INVALID_SUFFIX_CHAR:
2015 case LONGINT_OVERFLOW:
2016 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2018 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2019 _(msgid), (int) (suffix - string), string);
2022 case LONGINT_INVALID:
2024 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2025 _(msgid), quote (string));
2032 /* Handle interrupts and hangups. */
2035 sighandler (int sig)
2038 signal (sig, SIG_IGN);
2042 signal (sig, SIG_DFL);
2046 /* Set the ordering options for KEY specified in S.
2047 Return the address of the first character in S that
2048 is not a valid ordering option.
2049 BLANKTYPE is the kind of blanks that 'b' should skip. */
2052 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2059 if (blanktype == bl_start || blanktype == bl_both)
2060 key->skipsblanks = true;
2061 if (blanktype == bl_end || blanktype == bl_both)
2062 key->skipeblanks = true;
2065 key->ignore = nondictionary;
2068 key->translate = fold_toupper;
2071 key->general_numeric = true;
2074 /* Option order should not matter, so don't let -i override
2075 -d. -d implies -i, but -i does not imply -d. */
2077 key->ignore = nonprinting;
2083 key->numeric = true;
2086 key->reverse = true;
2096 static struct keyfield *
2099 struct keyfield *key = xzalloc (sizeof *key);
2100 key->eword = SIZE_MAX;
2105 main (int argc, char **argv)
2107 struct keyfield *key;
2108 struct keyfield gkey;
2111 bool checkonly = false;
2112 bool mergeonly = false;
2114 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2115 bool obsolete_usage = (posix2_version () < 200112);
2116 char *minus = "-", **files;
2117 char const *outfile = NULL;
2119 initialize_main (&argc, &argv);
2120 program_name = argv[0];
2121 setlocale (LC_ALL, "");
2122 bindtextdomain (PACKAGE, LOCALEDIR);
2123 textdomain (PACKAGE);
2127 initialize_exit_failure (SORT_FAILURE);
2128 atexit (close_stdout);
2130 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2131 #if HAVE_NL_LANGINFO
2132 hard_LC_TIME = hard_locale (LC_TIME);
2135 /* Get locale's representation of the decimal point. */
2137 struct lconv const *locale = localeconv ();
2139 /* If the locale doesn't define a decimal point, or if the decimal
2140 point is multibyte, use the C locale's decimal point. FIXME:
2141 add support for multibyte decimal points. */
2142 decimal_point = to_uchar (locale->decimal_point[0]);
2143 if (! decimal_point || locale->decimal_point[1])
2144 decimal_point = '.';
2146 /* FIXME: add support for multibyte thousands separators. */
2147 thousands_sep = to_uchar (*locale->thousands_sep);
2148 if (! thousands_sep || locale->thousands_sep[1])
2152 have_read_stdin = false;
2157 static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2158 enum { nsigs = sizeof sig / sizeof sig[0] };
2161 struct sigaction act;
2163 sigemptyset (&caught_signals);
2164 for (i = 0; i < nsigs; i++)
2166 sigaction (sig[i], NULL, &act);
2167 if (act.sa_handler != SIG_IGN)
2168 sigaddset (&caught_signals, sig[i]);
2171 act.sa_handler = sighandler;
2172 act.sa_mask = caught_signals;
2175 for (i = 0; i < nsigs; i++)
2176 if (sigismember (&caught_signals, sig[i]))
2177 sigaction (sig[i], &act, NULL);
2179 for (i = 0; i < nsigs; i++)
2180 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2182 signal (sig[i], sighandler);
2183 siginterrupt (sig[i], 1);
2188 gkey.sword = gkey.eword = SIZE_MAX;
2190 gkey.translate = NULL;
2191 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2192 gkey.skipsblanks = gkey.skipeblanks = false;
2194 files = xnmalloc (argc, sizeof *files);
2198 /* Parse an operand as a file after "--" was seen; or if
2199 pedantic and a file was seen, unless the POSIX version
2200 predates 1003.1-2001 and -c was not seen and the operand is
2201 "-o FILE" or "-oFILE". */
2204 || (posixly_correct && nfiles != 0
2205 && ! (obsolete_usage
2208 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2209 && (argv[optind][2] || optind + 1 != argc)))
2210 || ((c = getopt_long (argc, argv, short_options,
2211 long_options, NULL))
2216 files[nfiles++] = argv[optind++];
2222 if (obsolete_usage && optarg[0] == '+')
2224 /* Treat +POS1 [-POS2] as a key if possible; but silently
2225 treat an operand as a file if it is not a valid +POS1. */
2227 s = parse_field_count (optarg + 1, &key->sword, NULL);
2229 s = parse_field_count (s + 1, &key->schar, NULL);
2230 if (! (key->sword | key->schar))
2231 key->sword = SIZE_MAX;
2232 if (! s || *set_ordering (s, key, bl_start))
2239 if (optind != argc && argv[optind][0] == '-'
2240 && ISDIGIT (argv[optind][1]))
2242 char const *optarg1 = argv[optind++];
2243 s = parse_field_count (optarg1 + 1, &key->eword,
2244 N_("invalid number after `-'"));
2246 s = parse_field_count (s + 1, &key->echar,
2247 N_("invalid number after `.'"));
2248 if (*set_ordering (s, key, bl_end))
2249 badfieldspec (optarg1,
2250 N_("stray character in field spec"));
2256 files[nfiles++] = optarg;
2271 set_ordering (str, &gkey, bl_both);
2283 s = parse_field_count (optarg, &key->sword,
2284 N_("invalid number at field start"));
2287 /* Provoke with `sort -k0' */
2288 badfieldspec (optarg, N_("field number is zero"));
2292 s = parse_field_count (s + 1, &key->schar,
2293 N_("invalid number after `.'"));
2296 /* Provoke with `sort -k1.0' */
2297 badfieldspec (optarg, N_("character offset is zero"));
2300 if (! (key->sword | key->schar))
2301 key->sword = SIZE_MAX;
2302 s = set_ordering (s, key, bl_start);
2305 key->eword = SIZE_MAX;
2311 s = parse_field_count (s + 1, &key->eword,
2312 N_("invalid number after `,'"));
2315 /* Provoke with `sort -k1,0' */
2316 badfieldspec (optarg, N_("field number is zero"));
2319 s = parse_field_count (s + 1, &key->echar,
2320 N_("invalid number after `.'"));
2323 /* `-k 2,3' is equivalent to `+1 -3'. */
2326 s = set_ordering (s, key, bl_end);
2329 badfieldspec (optarg, N_("stray character in field spec"));
2338 if (outfile && !STREQ (outfile, optarg))
2339 error (SORT_FAILURE, 0, _("multiple output files specified"));
2348 specify_sort_size (optarg);
2353 char newtab = optarg[0];
2355 error (SORT_FAILURE, 0, _("empty tab"));
2358 if (STREQ (optarg, "\\0"))
2362 /* Provoke with `sort -txx'. Complain about
2363 "multi-character tab" instead of "multibyte tab", so
2364 that the diagnostic's wording does not need to be
2365 changed once multibyte characters are supported. */
2366 error (SORT_FAILURE, 0, _("multi-character tab %s"),
2370 if (tab != TAB_DEFAULT && tab != newtab)
2371 error (SORT_FAILURE, 0, _("incompatible tabs"));
2377 add_temp_dir (optarg);
2385 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
2386 through Solaris 7. It is also accepted by many non-Solaris
2387 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
2388 -y is marked as obsolete starting with Solaris 8 (1999), but is
2389 still accepted as of Solaris 10 prerelease (2004).
2391 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
2392 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
2393 and which in general ignores the argument after "-y" if it
2394 consists entirely of digits (it can even be empty). */
2395 if (optarg == argv[optind - 1])
2398 for (p = optarg; ISDIGIT (*p); p++)
2400 optind -= (*p != '\0');
2408 case_GETOPT_HELP_CHAR;
2410 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2413 usage (SORT_FAILURE);
2417 /* Inheritance of global options to individual keys. */
2418 for (key = keylist; key; key = key->next)
2419 if (! (key->ignore || key->translate
2420 || (key->skipsblanks | key->reverse
2421 | key->skipeblanks | key->month | key->numeric
2422 | key->general_numeric)))
2424 key->ignore = gkey.ignore;
2425 key->translate = gkey.translate;
2426 key->skipsblanks = gkey.skipsblanks;
2427 key->skipeblanks = gkey.skipeblanks;
2428 key->month = gkey.month;
2429 key->numeric = gkey.numeric;
2430 key->general_numeric = gkey.general_numeric;
2431 key->reverse = gkey.reverse;
2434 if (!keylist && (gkey.ignore || gkey.translate
2435 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2436 | gkey.numeric | gkey.general_numeric)))
2438 reverse = gkey.reverse;
2440 if (temp_dir_count == 0)
2442 char const *tmp_dir = getenv ("TMPDIR");
2443 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2456 error (0, 0, _("extra operand %s not allowed with -c"),
2458 usage (SORT_FAILURE);
2461 /* POSIX requires that sort return 1 IFF invoked with -c and the
2462 input is not properly sorted. */
2463 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2467 merge (files, 0, nfiles, outfile);
2469 sort (files, nfiles, outfile);
2471 if (have_read_stdin && fclose (stdin) == EOF)
2472 die (_("close failed"), "-");
2474 exit (EXIT_SUCCESS);