1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
32 #include "long-options.h"
34 #include "hard-locale.h"
38 #include "stdio-safer.h"
42 #if HAVE_SYS_RESOURCE_H
43 # include <sys/resource.h>
46 struct rlimit { size_t rlim_cur; };
47 # define getrlimit(Resource, Rlp) (-1)
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "sort"
53 #define AUTHORS N_ ("Mike Haertel and Paul Eggert")
55 #if defined ENABLE_NLS && HAVE_LANGINFO_H
56 # include <langinfo.h>
60 # define sigprocmask(How, Set, Oset) /* empty */
68 /* Undefine, to avoid warning about redefinition on some systems. */
69 /* FIXME: Remove these: use MIN/MAX from sys2.h. */
71 #define min(a, b) ((a) < (b) ? (a) : (b))
73 #define max(a, b) ((a) > (b) ? (a) : (b))
75 #define UCHAR_LIM (UCHAR_MAX + 1)
76 #define UCHAR(c) ((unsigned char) (c))
78 #ifndef DEFAULT_TMPDIR
79 # define DEFAULT_TMPDIR "/tmp"
82 /* Use this as exit status in case of error, not EXIT_FAILURE. This
83 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
84 that sort exit with status 1 IFF invoked with -c and the input is
85 not properly sorted. Any other irregular exit must exit with a
86 status code greater than 1. */
87 #define SORT_FAILURE 2
88 #define SORT_OUT_OF_ORDER 1
90 #define C_DECIMAL_POINT '.'
91 #define NEGATION_SIGN '-'
92 #define NUMERIC_ZERO '0'
96 static char decimal_point;
97 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
99 /* Nonzero if the corresponding locales are hard. */
100 static int hard_LC_COLLATE;
101 # if HAVE_NL_LANGINFO
102 static int hard_LC_TIME;
105 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
109 # define decimal_point C_DECIMAL_POINT
110 # define IS_THOUSANDS_SEP(x) 0
114 #define NONZERO(x) (x != 0)
116 /* The kind of blanks for '-b' to skip in various options. */
117 enum blanktype { bl_start, bl_end, bl_both };
119 /* The character marking end of line. Default to \n. */
120 static int eolchar = '\n';
122 /* Lines are held in core as counted strings. */
125 char *text; /* Text of the line. */
126 size_t length; /* Length including final newline. */
127 char *keybeg; /* Start of first key. */
128 char *keylim; /* Limit of first key. */
134 char *buf; /* Dynamically allocated buffer,
135 partitioned into 3 regions:
138 - an array of lines, in reverse order. */
139 size_t used; /* Number of bytes used for input data. */
140 size_t nlines; /* Number of lines in the line array. */
141 size_t alloc; /* Number of bytes allocated. */
142 size_t left; /* Number of bytes left from previous reads. */
143 size_t line_bytes; /* Number of bytes to reserve for each line. */
144 int eof; /* An EOF has been read. */
149 size_t sword; /* Zero-origin 'word' to start at. */
150 size_t schar; /* Additional characters to skip. */
151 int skipsblanks; /* Skip leading white space at start. */
152 size_t eword; /* Zero-origin first word after field. */
153 size_t echar; /* Additional characters in field. */
154 int skipeblanks; /* Skip trailing white space at finish. */
155 int *ignore; /* Boolean array of characters to ignore. */
156 char *translate; /* Translation applied to characters. */
157 int numeric; /* Flag for numeric comparison. Handle
158 strings of digits with optional decimal
159 point, but no exponential notation. */
160 int general_numeric; /* Flag for general, numeric comparison.
161 Handle numbers in exponential notation. */
162 int month; /* Flag for comparison by month name. */
163 int reverse; /* Reverse the sense of comparison. */
164 struct keyfield *next; /* Next keyfield to try. */
173 /* The name this program was run with. */
176 /* FIXME: None of these tables work with multibyte character sets.
177 Also, there are many other bugs when handling multibyte characters,
178 or even unibyte encodings where line boundaries are not in the
179 initial shift state. One way to fix this is to rewrite `sort' to
180 use wide characters internally, but doing this with good
181 performance is a bit tricky. */
183 /* Table of white space. */
184 static int blanks[UCHAR_LIM];
186 /* Table of non-printing characters. */
187 static int nonprinting[UCHAR_LIM];
189 /* Table of non-dictionary characters (not letters, digits, or blanks). */
190 static int nondictionary[UCHAR_LIM];
192 /* Translation table folding lower case to upper. */
193 static char fold_toupper[UCHAR_LIM];
195 #define MONTHS_PER_YEAR 12
197 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
198 # define MONTHTAB_CONST /* empty */
200 # define MONTHTAB_CONST const
203 /* Table mapping month names to integers.
204 Alphabetic order allows binary search. */
205 static MONTHTAB_CONST struct month monthtab[] =
221 /* During the merge phase, the number of files to merge at once. */
224 /* Minimum size for a merge or check buffer. */
225 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
227 /* Minimum sort size; the code might not work with smaller sizes. */
228 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
230 /* The number of bytes needed for a merge or check buffer, which can
231 function relatively efficiently even if it holds only one line. If
232 a longer line is seen, this value is increased. */
233 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
235 /* The approximate maximum number of bytes of main memory to use, as
236 specified by the user. Zero if the user has not specified a size. */
237 static size_t sort_size;
239 /* The guessed size for non-regular files. */
240 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
242 /* Array of directory names in which any temporary files are to be created. */
243 static char const **temp_dirs;
245 /* Number of temporary directory names used. */
246 static size_t temp_dir_count;
248 /* Number of allocated slots in temp_dirs. */
249 static size_t temp_dir_alloc;
251 /* Our process ID. */
252 static pid_t process_id;
254 /* Flag to reverse the order of all comparisons. */
257 /* Flag for stable sort. This turns off the last ditch bytewise
258 comparison of lines, and instead leaves lines in the same order
259 they were read if all keys compare equal. */
262 /* Tab character separating fields. If NUL, then fields are separated
263 by the empty string between a non-whitespace character and a whitespace
267 /* Flag to remove consecutive duplicate lines from the output.
268 Only the last of a sequence of equal lines will be output. */
271 /* Nonzero if any of the input files are the standard input. */
272 static int have_read_stdin;
274 /* List of key field comparisons to be tried. */
275 static struct keyfield *keylist;
281 fprintf (stderr, _("Try `%s --help' for more information.\n"),
286 Usage: %s [OPTION]... [FILE]...\n\
290 Write sorted concatenation of all FILE(s) to standard output.\n\
294 Mandatory arguments to long options are mandatory for short options too.\n\
295 -b, --ignore-leading-blanks ignore leading blanks\n\
296 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
297 -f, --ignore-case fold lower case to upper case characters\n\
298 -g, --general-numeric-sort compare according to general numerical value\n\
299 -i, --ignore-nonprinting consider only printable characters\n\
300 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
301 -n, --numeric-sort compare according to string numerical value\n\
302 -r, --reverse reverse the result of comparisons\n\
309 -c, --check check whether input is sorted; do not sort\n\
310 -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\
311 -m, --merge merge already sorted files; do not sort\n\
312 -o, --output=FILE write result to FILE instead of standard output\n\
313 -s, --stable stabilize sort by disabling last-resort comparison\n\
314 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
315 -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\
316 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s\n\
317 multiple options specify multiple directories\n\
318 -u, --unique with -c: check for strict ordering\n\
319 otherwise: output only the first of an equal run\n\
320 -z, --zero-terminated end lines with 0 byte, not newline\n\
321 +POS1 [-POS2] start a key at POS1, end it before POS2 (origin 0)\n\
322 Warning: this option is obsolescent\n\
323 --help display this help and exit\n\
324 --version output version information and exit\n\
329 POS is F[.C][OPTS], where F is the field number and C the character position\n\
330 in the field, both counted from one with -k, from zero with the obsolescent\n\
331 form. OPTS is made up of one or more single-letter ordering options, which\n\
332 override global ordering options for that key. If no key is given, use the\n\
333 entire line as the key.\n\
335 SIZE may be followed by the following multiplicative suffixes:\n\
336 %% 1%% of memory, b 1, k 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
338 With no FILE, or when FILE is -, read standard input.\n\
341 The locale specified by the environment affects sort order.\n\
342 Set LC_COLLATE=C to get the traditional sort order that uses\n\
343 native byte values.\n\
346 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
348 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
349 POSIX requires that sort return 1 IFF invoked with -c and
350 the input is not properly sorted. */
351 assert (status == 0 || status == SORT_FAILURE);
355 static struct option const long_options[] =
357 {"ignore-leading-blanks", no_argument, NULL, 'b'},
358 {"check", no_argument, NULL, 'c'},
359 {"dictionary-order", no_argument, NULL, 'd'},
360 {"ignore-case", no_argument, NULL, 'f'},
361 {"general-numeric-sort", no_argument, NULL, 'g'},
362 {"ignore-nonprinting", no_argument, NULL, 'i'},
363 {"key", required_argument, NULL, 'k'},
364 {"merge", no_argument, NULL, 'm'},
365 {"month-sort", no_argument, NULL, 'M'},
366 {"numeric-sort", no_argument, NULL, 'n'},
367 {"output", required_argument, NULL, 'o'},
368 {"reverse", no_argument, NULL, 'r'},
369 {"stable", no_argument, NULL, 's'},
370 {"buffer-size", required_argument, NULL, 'S'},
371 {"field-separator", required_argument, NULL, 't'},
372 {"temporary-directory", required_argument, NULL, 'T'},
373 {"unique", no_argument, NULL, 'u'},
374 {"zero-terminated", no_argument, NULL, 'z'},
375 {GETOPT_HELP_OPTION_DECL},
376 {GETOPT_VERSION_OPTION_DECL},
380 /* The set of signals that are caught. */
381 static sigset_t caught_signals;
383 /* The list of temporary files. */
386 struct tempnode *volatile next;
387 char name[1]; /* Actual size is 1 + file name length. */
389 static struct tempnode *volatile temphead;
391 /* Clean up any remaining temporary files. */
396 struct tempnode *node;
398 for (node = temphead; node; node = node->next)
402 /* Report MESSAGE for FILE, then clean up and exit. */
404 static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN;
406 die (char const *message, char const *file)
408 error (0, errno, "%s: %s", message, file);
413 /* Create a new temporary file, returning its newly allocated name.
414 Store into *PFP a stream open for writing. */
417 create_temp_file (FILE **pfp)
419 static char const slashbase[] = "/sortXXXXXX";
420 static size_t temp_dir_index;
424 char const *temp_dir = temp_dirs[temp_dir_index];
425 size_t len = strlen (temp_dir);
426 struct tempnode *node =
427 (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase);
428 char *file = node->name;
430 memcpy (file, temp_dir, len);
431 memcpy (file + len, slashbase, sizeof slashbase);
432 node->next = temphead;
433 if (++temp_dir_index == temp_dir_count)
436 /* Create the temporary file in a critical section, to avoid races. */
437 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
442 sigprocmask (SIG_SETMASK, &oldset, NULL);
445 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
446 die (_("cannot create temporary file"), file);
452 xfopen (const char *file, const char *how)
456 if (STREQ (file, "-"))
468 if ((fp = fopen_safer (file, how)) == NULL)
469 die (_("open failed"), file);
475 /* Close FP, whose name is FILE, and report any errors. */
478 xfclose (FILE *fp, char const *file)
482 /* Allow reading stdin from tty more than once. */
488 if (fclose (fp) != 0)
489 die (_("close failed"), file);
494 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
496 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
497 die (_("write failed"), output_file);
500 /* Append DIR to the array of temporary directory names. */
502 add_temp_dir (char const *dir)
504 if (temp_dir_count == temp_dir_alloc)
506 temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
507 temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
510 temp_dirs[temp_dir_count++] = dir;
513 /* Search through the list of temporary files for NAME;
514 remove it if it is found on the list. */
517 zaptemp (const char *name)
519 struct tempnode *volatile *pnode;
520 struct tempnode *node;
522 for (pnode = &temphead; (node = *pnode); pnode = &node->next)
523 if (node->name == name)
527 free ((char *) node);
535 struct_month_cmp (const void *m1, const void *m2)
537 return strcmp (((const struct month *) m1)->name,
538 ((const struct month *) m2)->name);
543 /* Initialize the character class tables. */
550 for (i = 0; i < UCHAR_LIM; ++i)
556 if (!ISALNUM (i) && !ISBLANK (i))
557 nondictionary[i] = 1;
559 fold_toupper[i] = toupper (i);
564 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
565 /* If we're not in the "C" locale, read different names for months. */
568 for (i = 0; i < MONTHS_PER_YEAR; i++)
575 s = (char *) nl_langinfo (ABMON_1 + i);
577 monthtab[i].name = name = (char *) xmalloc (s_len + 1);
578 monthtab[i].val = i + 1;
580 for (j = 0; j < s_len; j++)
581 name[j] = fold_toupper[UCHAR (s[j])];
584 qsort ((void *) monthtab, MONTHS_PER_YEAR,
585 sizeof (struct month), struct_month_cmp);
590 /* Specify the amount of main memory to use when sorting. */
592 specify_sort_size (char const *s)
596 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkmMPtTYZ");
598 /* The default unit is kB. */
599 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
601 if (n <= UINTMAX_MAX / 1024)
604 e = LONGINT_OVERFLOW;
607 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
608 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
617 double mem = physmem_total () * n / 100;
619 /* Use "<", not "<=", to avoid problems with rounding. */
620 if (mem < UINTMAX_MAX)
626 e = LONGINT_OVERFLOW;
636 sort_size = MAX (sort_size, MIN_SORT_SIZE);
640 e = LONGINT_OVERFLOW;
643 STRTOL_FATAL_ERROR (s, _("sort size"), e);
646 /* Return the default sort size. */
648 default_sort_size (void)
650 /* Let MEM be available memory or 1/8 of total memory, whichever
652 double avail = physmem_available ();
653 double total = physmem_total ();
654 double mem = MAX (avail, total / 8);
655 struct rlimit rlimit;
657 /* Let SIZE be MEM, but no more than the maximum object size or
658 system resource limits. Avoid the MIN macro here, as it is not
659 quite right when only one argument is floating point. Don't
660 bother to check for values like RLIM_INFINITY since in practice
661 they are not much less than SIZE_MAX. */
662 size_t size = SIZE_MAX;
665 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
666 size = rlimit.rlim_cur;
668 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
669 size = rlimit.rlim_cur;
672 /* Leave a large safety margin for the above limits, as failure can
673 occur when they are exceeded. */
677 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
678 Exceeding RSS is not fatal, but can be quite slow. */
679 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
680 size = rlimit.rlim_cur / 16 * 15;
683 /* Use no less than the minimum. */
684 return MAX (size, MIN_SORT_SIZE);
687 /* Return the sort buffer size to use with the input files identified
688 by FPS and FILES, which are alternate paths to the same files.
689 NFILES gives the number of input files; NFPS may be less. Assume
690 that each input line requires LINE_BYTES extra bytes' worth of line
691 information. Return at most SIZE_BOUND. */
694 sort_buffer_size (FILE *const *fps, int nfps,
695 char *const *files, int nfiles,
696 size_t line_bytes, size_t size_bound)
698 /* In the worst case, each input byte is a newline. */
699 size_t worst_case_per_input_byte = line_bytes + 1;
701 /* Keep enough room for one extra input line and an extra byte.
702 This extra room might be needed when preparing to read EOF. */
703 size_t size = worst_case_per_input_byte + 1;
707 for (i = 0; i < nfiles; i++)
713 if ((i < nfps ? fstat (fileno (fps[i]), &st)
714 : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
715 : stat (files[i], &st))
717 die (_("stat failed"), files[i]);
719 file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;
721 /* Add the amount of memory needed to represent the worst case
722 where the input consists entirely of newlines followed by a
723 single non-newline. Check for overflow. */
724 worst_case = file_size * worst_case_per_input_byte + 1;
725 if (file_size != worst_case / worst_case_per_input_byte
726 || size_bound - size <= worst_case)
734 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
735 must be at least sizeof (struct line). Allocate ALLOC bytes
739 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
741 /* Ensure that the line array is properly aligned. If the desired
742 size cannot be allocated, repeatedly halve it until allocation
743 succeeds. The smaller allocation may hurt overall performance,
744 but that's better than failing. */
747 alloc += sizeof (struct line) - alloc % sizeof (struct line);
748 buf->buf = malloc (alloc);
752 if (alloc <= line_bytes + 1)
756 buf->line_bytes = line_bytes;
758 buf->used = buf->left = buf->nlines = 0;
762 /* Return one past the limit of the line array. */
764 static inline struct line *
765 buffer_linelim (struct buffer const *buf)
767 return (struct line *) (buf->buf + buf->alloc);
770 /* Return a pointer to the first character of the field specified
774 begfield (const struct line *line, const struct keyfield *key)
776 register char *ptr = line->text, *lim = ptr + line->length - 1;
777 register size_t sword = key->sword;
778 register size_t schar = key->schar;
781 while (ptr < lim && sword--)
783 while (ptr < lim && *ptr != tab)
789 while (ptr < lim && sword--)
791 while (ptr < lim && blanks[UCHAR (*ptr)])
793 while (ptr < lim && !blanks[UCHAR (*ptr)])
797 if (key->skipsblanks)
798 while (ptr < lim && blanks[UCHAR (*ptr)])
801 if (schar < lim - ptr)
809 /* Return the limit of (a pointer to the first character after) the field
810 in LINE specified by KEY. */
813 limfield (const struct line *line, const struct keyfield *key)
815 register char *ptr = line->text, *lim = ptr + line->length - 1;
816 register size_t eword = key->eword, echar = key->echar;
818 /* Note: from the POSIX spec:
819 The leading field separator itself is included in
820 a field when -t is not used. FIXME: move this comment up... */
822 /* Move PTR past EWORD fields or to one past the last byte on LINE,
823 whichever comes first. If there are more than EWORD fields, leave
824 PTR pointing at the beginning of the field having zero-based index,
825 EWORD. If a delimiter character was specified (via -t), then that
826 `beginning' is the first character following the delimiting TAB.
827 Otherwise, leave PTR pointing at the first `blank' character after
828 the preceding field. */
830 while (ptr < lim && eword--)
832 while (ptr < lim && *ptr != tab)
834 if (ptr < lim && (eword | echar))
838 while (ptr < lim && eword--)
840 while (ptr < lim && blanks[UCHAR (*ptr)])
842 while (ptr < lim && !blanks[UCHAR (*ptr)])
846 #ifdef POSIX_UNSPECIFIED
847 /* The following block of code makes GNU sort incompatible with
848 standard Unix sort, so it's ifdef'd out for now.
849 The POSIX spec isn't clear on how to interpret this.
850 FIXME: request clarification.
852 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
853 Date: Thu, 30 May 96 12:20:41 -0400
855 [...]I believe I've found another bug in `sort'.
860 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
863 $ /bin/sort +0.6 -0.7 </tmp/sort.in
867 Unix sort produced the answer I expected: sort on the single character
868 in column 6. GNU sort produced different results, because it disagrees
869 on the interpretation of the key-end spec "-M.N". Unix sort reads this
870 as "skip M fields, then N characters"; but GNU sort wants it to mean
871 "skip M fields, then either N characters or the rest of the current
872 field, whichever comes first". This extra clause applies only to
873 key-ends, not key-starts.
876 /* Make LIM point to the end of (one byte past) the current field. */
880 newlim = memchr (ptr, tab, lim - ptr);
888 while (newlim < lim && blanks[UCHAR (*newlim)])
890 while (newlim < lim && !blanks[UCHAR (*newlim)])
896 /* If we're skipping leading blanks, don't start counting characters
897 until after skipping past any leading blanks. */
898 if (key->skipsblanks)
899 while (ptr < lim && blanks[UCHAR (*ptr)])
902 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
903 if (echar < lim - ptr)
914 trim_trailing_blanks (const char *a_start, char **a_end)
916 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
920 /* Fill BUF reading from FP, moving buf->left bytes from the end
921 of buf->buf to the beginning first. If EOF is reached and the
922 file wasn't terminated by a newline, supply one. Set up BUF's line
923 table too. FILE is the name of the file corresponding to FP.
924 Return nonzero if some input was read. */
927 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
929 struct keyfield const *key = keylist;
931 size_t line_bytes = buf->line_bytes;
932 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
937 if (buf->used != buf->left)
939 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
940 buf->used = buf->left;
946 char *ptr = buf->buf + buf->used;
947 struct line *linelim = buffer_linelim (buf);
948 struct line *line = linelim - buf->nlines;
949 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
950 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
952 while (line_bytes + 1 < avail)
954 /* Read as many bytes as possible, but do not read so many
955 bytes that there might not be enough room for the
956 corresponding line array. The worst case is when the
957 rest of the input file consists entirely of newlines,
958 except that the last byte is not a newline. */
959 size_t readsize = (avail - 1) / (line_bytes + 1);
960 size_t bytes_read = fread (ptr, 1, readsize, fp);
961 char *ptrlim = ptr + bytes_read;
965 if (bytes_read != readsize)
968 die (_("read failed"), file);
972 if (buf->buf == ptrlim)
974 if (ptrlim[-1] != eol)
979 /* Find and record each line in the just-read input. */
980 while ((p = memchr (ptr, eol, ptrlim - ptr)))
984 line->text = line_start;
985 line->length = ptr - line_start;
986 mergesize = MAX (mergesize, line->length);
991 /* Precompute the position of the first key for
993 line->keylim = key->eword == -1 ? p : limfield (line, key);
995 if (key->sword != -1)
996 line->keybeg = begfield (line, key);
999 if (key->skipsblanks)
1000 while (blanks[UCHAR (*line_start)])
1002 line->keybeg = line_start;
1004 if (key->skipeblanks)
1005 trim_trailing_blanks (line->keybeg, &line->keylim);
1016 buf->used = ptr - buf->buf;
1017 buf->nlines = buffer_linelim (buf) - line;
1018 if (buf->nlines != 0)
1020 buf->left = ptr - line_start;
1021 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1025 /* The current input line is too long to fit in the buffer.
1026 Double the buffer size and try again. */
1027 if (2 * buf->alloc < buf->alloc)
1030 buf->buf = xrealloc (buf->buf, buf->alloc);
1034 /* Compare strings A and B containing decimal fractions < 1. Each string
1035 should begin with a decimal point followed immediately by the digits
1036 of the fraction. Strings not of this form are considered to be zero. */
1038 /* The goal here, is to take two numbers a and b... compare these
1039 in parallel. Instead of converting each, and then comparing the
1040 outcome. Most likely stopping the comparison before the conversion
1041 is complete. The algorithm used, in the old sort:
1043 Algorithm: fraccompare
1044 Action : compare two decimal fractions
1045 accepts : char *a, char *b
1046 returns : -1 if a<b, 0 if a=b, 1 if a>b.
1049 if *a == decimal_point AND *b == decimal_point
1050 find first character different in a and b.
1051 if both are digits, return the difference *a - *b.
1054 if digit return 1, else 0
1057 if digit return -1, else 0
1058 if *a is a decimal_point
1059 skip past decimal_point and zeros
1060 if digit return 1, else 0
1061 if *b is a decimal_point
1062 skip past decimal_point and zeros
1063 if digit return -1, else 0
1067 fraccompare (register const char *a, register const char *b)
1069 if (*a == decimal_point && *b == decimal_point)
1071 while (*++a == *++b)
1074 if (ISDIGIT (*a) && ISDIGIT (*b))
1077 goto a_trailing_nonzero;
1079 goto b_trailing_nonzero;
1082 else if (*a++ == decimal_point)
1085 while (*a == NUMERIC_ZERO)
1087 return ISDIGIT (*a);
1089 else if (*b++ == decimal_point)
1092 while (*b == NUMERIC_ZERO)
1094 return - ISDIGIT (*b);
1099 /* Compare strings A and B as numbers without explicitly converting them to
1100 machine numbers. Comparatively slow for short strings, but asymptotically
1104 numcompare (register const char *a, register const char *b)
1106 register int tmpa, tmpb, tmp;
1107 register size_t loga, logb;
1112 while (blanks[UCHAR (tmpa)])
1114 while (blanks[UCHAR (tmpb)])
1117 if (tmpa == NEGATION_SIGN)
1121 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1122 if (tmpb != NEGATION_SIGN)
1124 if (tmpa == decimal_point)
1127 while (tmpa == NUMERIC_ZERO);
1130 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1132 if (tmpb == decimal_point)
1135 while (tmpb == NUMERIC_ZERO);
1142 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1144 while (tmpa == tmpb && ISDIGIT (tmpa))
1148 while (IS_THOUSANDS_SEP (tmpa));
1151 while (IS_THOUSANDS_SEP (tmpb));
1154 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1155 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1156 return -fraccompare (a, b);
1160 for (loga = 0; ISDIGIT (tmpa); ++loga)
1163 while (IS_THOUSANDS_SEP (tmpa));
1165 for (logb = 0; ISDIGIT (tmpb); ++logb)
1168 while (IS_THOUSANDS_SEP (tmpb));
1171 return loga < logb ? 1 : -1;
1178 else if (tmpb == NEGATION_SIGN)
1182 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1183 if (tmpb == decimal_point)
1186 while (tmpb == NUMERIC_ZERO);
1189 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1191 if (tmpa == decimal_point)
1194 while (tmpa == NUMERIC_ZERO);
1201 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1203 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1206 while (tmpa == tmpb && ISDIGIT (tmpa))
1210 while (IS_THOUSANDS_SEP (tmpa));
1213 while (IS_THOUSANDS_SEP (tmpb));
1216 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1217 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1218 return fraccompare (a, b);
1222 for (loga = 0; ISDIGIT (tmpa); ++loga)
1225 while (IS_THOUSANDS_SEP (tmpa));
1227 for (logb = 0; ISDIGIT (tmpb); ++logb)
1230 while (IS_THOUSANDS_SEP (tmpb));
1233 return loga < logb ? -1 : 1;
1243 general_numcompare (const char *sa, const char *sb)
1245 /* FIXME: add option to warn about failed conversions. */
1246 /* FIXME: maybe add option to try expensive FP conversion
1247 only if A and B can't be compared more cheaply/accurately. */
1251 double a = strtod (sa, &ea);
1252 double b = strtod (sb, &eb);
1254 /* Put conversion errors at the start of the collating sequence. */
1256 return sb == eb ? 0 : -1;
1260 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1261 conversion errors but before numbers; sort them by internal
1262 bit-pattern, for lack of a more portable alternative. */
1268 : memcmp ((char *) &a, (char *) &b, sizeof a));
1271 /* Return an integer in 1..12 of the month name S with length LEN.
1272 Return 0 if the name in S is not recognized. */
1275 getmonth (const char *s, size_t len)
1279 register int lo = 0, hi = MONTHS_PER_YEAR, result;
1281 while (len > 0 && blanks[UCHAR (*s)])
1290 month = (char *) alloca (len + 1);
1291 for (i = 0; i < len; ++i)
1292 month[i] = fold_toupper[UCHAR (s[i])];
1293 while (blanks[UCHAR (month[i - 1])])
1299 int ix = (lo + hi) / 2;
1301 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1306 while (hi - lo > 1);
1308 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1309 ? monthtab[lo].val : 0);
1314 /* Compare two lines A and B trying every key in sequence until there
1315 are no more keys or a difference is found. */
1318 keycompare (const struct line *a, const struct line *b)
1320 struct keyfield *key = keylist;
1322 /* For the first iteration only, the key positions have been
1323 precomputed for us. */
1324 register char *texta = a->keybeg;
1325 register char *textb = b->keybeg;
1326 register char *lima = a->keylim;
1327 register char *limb = b->keylim;
1333 register unsigned char *translate = (unsigned char *) key->translate;
1334 register int *ignore = key->ignore;
1336 /* Find the lengths. */
1337 size_t lena = lima <= texta ? 0 : lima - texta;
1338 size_t lenb = limb <= textb ? 0 : limb - textb;
1340 if (key->skipeblanks)
1342 char *a_end = texta + lena;
1343 char *b_end = textb + lenb;
1344 trim_trailing_blanks (texta, &a_end);
1345 trim_trailing_blanks (textb, &b_end);
1346 lena = a_end - texta;
1347 lenb = b_end - textb;
1350 /* Actually compare the fields. */
1351 if (key->numeric | key->general_numeric)
1353 char savea = *lima, saveb = *limb;
1355 *lima = *limb = '\0';
1356 diff = ((key->numeric ? numcompare : general_numcompare)
1358 *lima = savea, *limb = saveb;
1360 else if (key->month)
1361 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1363 /* Sorting like this may become slow, so in a simple locale the user
1364 can select a faster sort that is similar to ascii sort */
1365 else if (hard_LC_COLLATE)
1367 if (ignore || translate)
1369 char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1370 char *copy_b = copy_a + lena + 1;
1371 size_t new_len_a, new_len_b, i;
1373 /* Ignore and/or translate chars before comparing. */
1374 for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1378 copy_a[new_len_a] = (translate
1379 ? translate[UCHAR (texta[i])]
1381 if (!ignore || !ignore[UCHAR (texta[i])])
1386 copy_b[new_len_b] = (translate
1387 ? translate[UCHAR (textb[i])]
1389 if (!ignore || !ignore[UCHAR (textb[i])])
1394 diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
1397 diff = - NONZERO (lenb);
1401 diff = memcoll (texta, lena, textb, lenb);
1406 #define CMP_WITH_IGNORE(A, B) \
1411 while (texta < lima && ignore[UCHAR (*texta)]) \
1413 while (textb < limb && ignore[UCHAR (*textb)]) \
1415 if (! (texta < lima && textb < limb)) \
1417 diff = UCHAR (A) - UCHAR (B); \
1424 diff = (lima - texta) - (limb - textb); \
1429 CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1430 translate[UCHAR (*textb)]);
1432 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1435 diff = - NONZERO (lenb);
1442 while (texta < lima && textb < limb)
1444 diff = (UCHAR (translate[UCHAR (*texta++)])
1445 - UCHAR (translate[UCHAR (*textb++)]));
1452 diff = memcmp (texta, textb, min (lena, lenb));
1456 diff = lena < lenb ? -1 : lena != lenb;
1466 /* Find the beginning and limit of the next field. */
1467 if (key->eword != -1)
1468 lima = limfield (a, key), limb = limfield (b, key);
1470 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1472 if (key->sword != -1)
1473 texta = begfield (a, key), textb = begfield (b, key);
1476 texta = a->text, textb = b->text;
1477 if (key->skipsblanks)
1479 while (texta < lima && blanks[UCHAR (*texta)])
1481 while (textb < limb && blanks[UCHAR (*textb)])
1492 return key->reverse ? -diff : diff;
1495 /* Compare two lines A and B, returning negative, zero, or positive
1496 depending on whether A compares less than, equal to, or greater than B. */
1499 compare (register const struct line *a, register const struct line *b)
1504 /* First try to compare on the specified keys (if any).
1505 The only two cases with no key at all are unadorned sort,
1506 and unadorned sort -r. */
1509 diff = keycompare (a, b);
1511 if (diff != 0 || unique || stable)
1515 /* If the keys all compare equal (or no keys were specified)
1516 fall through to the default comparison. */
1517 alen = a->length - 1, blen = b->length - 1;
1520 diff = - NONZERO (blen);
1522 diff = NONZERO (alen);
1524 else if (hard_LC_COLLATE)
1525 diff = memcoll (a->text, alen, b->text, blen);
1527 else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
1528 diff = alen < blen ? -1 : alen != blen;
1530 return reverse ? -diff : diff;
1533 /* Check that the lines read from the given FP come in order. Print a
1534 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1535 one if they are not in order. Otherwise, print no diagnostic
1539 checkfp (FILE *fp, char *file_name)
1541 struct buffer buf; /* Input buffer. */
1542 struct line temp; /* Copy of previous line. */
1544 uintmax_t line_number = 0;
1545 struct keyfield *key = keylist;
1546 int nonunique = 1 - unique;
1549 initbuf (&buf, sizeof (struct line),
1550 MAX (merge_buffer_size, sort_size));
1553 while (fillbuf (&buf, fp, file_name))
1555 struct line const *line = buffer_linelim (&buf);
1556 struct line const *linebase = line - buf.nlines;
1558 /* Make sure the line saved from the old buffer contents is
1559 less than or equal to the first line of the new buffer. */
1560 if (alloc && nonunique <= compare (&temp, line - 1))
1564 char hr_buf[LONGEST_HUMAN_READABLE + 1];
1565 struct line const *disorder_line = line - 1;
1566 uintmax_t disorder_line_number =
1567 buffer_linelim (&buf) - disorder_line + line_number;
1568 fprintf (stderr, _("%s: %s:%s: disorder: "),
1569 program_name, file_name,
1570 human_readable (disorder_line_number, hr_buf, 1, 1));
1571 write_bytes (disorder_line->text, disorder_line->length, stderr,
1572 _("standard error"));
1578 /* Compare each line in the buffer with its successor. */
1579 while (linebase < --line)
1580 if (nonunique <= compare (line, line - 1))
1581 goto found_disorder;
1583 line_number += buf.nlines;
1585 /* Save the last line of the buffer. */
1586 if (alloc < line->length)
1593 alloc = line->length;
1597 while (alloc < line->length);
1599 temp.text = xrealloc (temp.text, alloc);
1601 memcpy (temp.text, line->text, line->length);
1602 temp.length = line->length;
1605 temp.keybeg = temp.text + (line->keybeg - line->text);
1606 temp.keylim = temp.text + (line->keylim - line->text);
1610 xfclose (fp, file_name);
1617 /* Merge lines from FILES onto OFP. NFILES cannot be greater than
1618 NMERGE. Close input and output files before returning.
1619 OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1620 output file has not been opened yet. */
1623 mergefps (char **files, register int nfiles,
1624 FILE *ofp, const char *output_file)
1626 FILE *fps[NMERGE]; /* Input streams for each file. */
1627 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1628 struct line saved; /* Saved line storage for unique check. */
1629 struct line const *savedline = NULL;
1630 /* &saved if there is a saved line. */
1631 size_t savealloc = 0; /* Size allocated for the saved line. */
1632 struct line const *cur[NMERGE]; /* Current line in each line table. */
1633 struct line const *base[NMERGE]; /* Base of each line table. */
1634 int ord[NMERGE]; /* Table representing a permutation of fps,
1635 such that cur[ord[0]] is the smallest line
1636 and will be next output. */
1637 register int i, j, t;
1638 struct keyfield *key = keylist;
1641 /* Read initial lines from each input file. */
1642 for (i = 0; i < nfiles; ++i)
1644 fps[i] = xfopen (files[i], "r");
1645 initbuf (&buffer[i], sizeof (struct line),
1646 MAX (merge_buffer_size, sort_size / nfiles));
1647 /* If a file is empty, eliminate it from future consideration. */
1648 while (i < nfiles && !fillbuf (&buffer[i], fps[i], files[i]))
1650 xfclose (fps[i], files[i]);
1653 for (j = i; j < nfiles; ++j)
1654 files[j] = files[j + 1];
1657 free (buffer[i].buf);
1660 struct line const *linelim = buffer_linelim (&buffer[i]);
1661 cur[i] = linelim - 1;
1662 base[i] = linelim - buffer[i].nlines;
1667 ofp = xfopen (output_file, "w");
1669 /* Set up the ord table according to comparisons among input lines.
1670 Since this only reorders two items if one is strictly greater than
1671 the other, it is stable. */
1672 for (i = 0; i < nfiles; ++i)
1674 for (i = 1; i < nfiles; ++i)
1675 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1676 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1678 /* Repeatedly output the smallest line until no input remains. */
1681 struct line const *smallest = cur[ord[0]];
1683 /* If uniquified output is turned on, output only the first of
1684 an identical series of lines. */
1687 if (savedline && compare (savedline, smallest))
1690 write_bytes (saved.text, saved.length, ofp, output_file);
1695 if (savealloc < smallest->length)
1700 savealloc = smallest->length;
1703 while ((savealloc *= 2) < smallest->length);
1705 saved.text = xrealloc (saved.text, savealloc);
1707 saved.length = smallest->length;
1708 memcpy (saved.text, smallest->text, saved.length);
1712 saved.text + (smallest->keybeg - smallest->text);
1714 saved.text + (smallest->keylim - smallest->text);
1719 write_bytes (smallest->text, smallest->length, ofp, output_file);
1721 /* Check if we need to read more lines into core. */
1722 if (base[ord[0]] < smallest)
1723 cur[ord[0]] = smallest - 1;
1726 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1728 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1729 cur[ord[0]] = linelim - 1;
1730 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1734 /* We reached EOF on fps[ord[0]]. */
1735 for (i = 1; i < nfiles; ++i)
1736 if (ord[i] > ord[0])
1739 xfclose (fps[ord[0]], files[ord[0]]);
1740 zaptemp (files[ord[0]]);
1741 free (buffer[ord[0]].buf);
1742 for (i = ord[0]; i < nfiles; ++i)
1744 fps[i] = fps[i + 1];
1745 files[i] = files[i + 1];
1746 buffer[i] = buffer[i + 1];
1747 cur[i] = cur[i + 1];
1748 base[i] = base[i + 1];
1750 for (i = 0; i < nfiles; ++i)
1751 ord[i] = ord[i + 1];
1756 /* The new line just read in may be larger than other lines
1757 already in core; push it back in the queue until we encounter
1758 a line larger than it. */
1759 for (i = 1; i < nfiles; ++i)
1761 t = compare (cur[ord[0]], cur[ord[i]]);
1763 t = ord[0] - ord[i];
1768 for (j = 1; j < i; ++j)
1769 ord[j - 1] = ord[j];
1773 if (unique && savedline)
1775 write_bytes (saved.text, saved.length, ofp, output_file);
1779 xfclose (ofp, output_file);
1782 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1783 The input and output arrays are in reverse order, and LINES and
1784 TEMP point just past the end of their respective arrays. */
1787 sortlines (struct line *lines, size_t nlines, struct line *temp)
1789 register struct line *lo, *hi, *t;
1790 register size_t nlo, nhi;
1794 if (0 < compare (&lines[-1], &lines[-2]))
1796 struct line tmp = lines[-1];
1797 lines[-1] = lines[-2];
1809 sortlines (lo, nlo, temp);
1812 sortlines (hi, nhi, temp);
1817 if (compare (lo - 1, hi - 1) <= 0)
1818 *--t = *--lo, --nlo;
1820 *--t = *--hi, --nhi;
1824 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1828 /* Return the index of the first of NFILES FILES that is the same file
1829 as OUTFILE. If none can be the same, return NFILES. Consider an
1830 input pipe to be the same as OUTFILE, since the pipe might be the
1831 output of a command like "cat OUTFILE". */
1834 first_same_file (char **files, int nfiles, char const *outfile)
1837 int got_outstat = 0;
1838 struct stat instat, outstat;
1840 for (i = 0; i < nfiles; i++)
1842 int standard_input = STREQ (files[i], "-");
1844 if (STREQ (outfile, files[i]) && ! standard_input)
1850 if ((STREQ (outfile, "-")
1851 ? fstat (STDOUT_FILENO, &outstat)
1852 : stat (outfile, &outstat))
1857 if (((standard_input
1858 ? fstat (STDIN_FILENO, &instat)
1859 : stat (files[i], &instat))
1861 && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1868 /* Check that each of the NFILES FILES is ordered.
1869 Return a count of disordered files. */
1872 check (char **files, int nfiles)
1874 int i, disorders = 0;
1877 for (i = 0; i < nfiles; ++i)
1879 fp = xfopen (files[i], "r");
1880 disorders += checkfp (fp, files[i]);
1885 /* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most
1886 MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot
1890 merge (char **files, int nfiles, int max_merge, char const *output_file)
1892 while (max_merge < nfiles)
1897 for (i = 0; i < nfiles / NMERGE; ++i)
1899 temp = create_temp_file (&tfp);
1900 mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1903 temp = create_temp_file (&tfp);
1904 mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1911 mergefps (files, nfiles, NULL, output_file);
1914 /* Sort NFILES FILES onto OUTPUT_FILE. */
1917 sort (char **files, int nfiles, char const *output_file)
1920 int n_temp_files = 0;
1921 int output_file_created = 0;
1924 if (! size && ! (size = sort_size))
1925 size = default_sort_size ();
1931 char const *temp_output;
1932 char const *file = *files;
1933 FILE *fp = xfopen (file, "r");
1937 initbuf (&buf, 2 * sizeof (struct line),
1938 sort_buffer_size (&fp, 1, files, nfiles,
1939 2 * sizeof (struct line), size));
1944 while (fillbuf (&buf, fp, file))
1947 struct line *linebase;
1949 if (buf.eof && nfiles
1950 && (2 * sizeof (struct line) + 1
1951 < (buf.alloc - buf.used
1952 - 2 * sizeof (struct line) * buf.nlines)))
1954 /* End of file, but there is more input and buffer room.
1955 Concatenate the next input file; this is faster in
1957 buf.left = buf.used;
1961 line = buffer_linelim (&buf);
1962 linebase = line - buf.nlines;
1963 sortlines (line, buf.nlines, linebase);
1964 if (buf.eof && !nfiles && !n_temp_files && !buf.left)
1967 tfp = xfopen (output_file, "w");
1968 temp_output = output_file;
1969 output_file_created = 1;
1974 temp_output = create_temp_file (&tfp);
1980 write_bytes (line->text, line->length, tfp, temp_output);
1982 while (linebase < line && compare (line, line - 1) == 0)
1985 while (linebase < line);
1987 xfclose (tfp, temp_output);
1989 if (output_file_created)
1998 if (! output_file_created)
2000 int i = n_temp_files;
2001 struct tempnode *node;
2002 char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2003 for (node = temphead; i > 0; node = node->next)
2004 tempfiles[--i] = node->name;
2005 merge (tempfiles, n_temp_files, NMERGE, output_file);
2006 free ((char *) tempfiles);
2010 /* Insert key KEY at the end of the key list. */
2013 insertkey (struct keyfield *key)
2015 struct keyfield **p;
2017 for (p = &keylist; *p; p = &(*p)->next)
2023 /* Report a bad field specification SPEC, with extra info MSGID. */
2025 static void badfieldspec PARAMS ((char const *, char const *))
2028 badfieldspec (char const *spec, char const *msgid)
2030 error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2035 /* Parse the leading integer in STRING and store the resulting value
2036 (which must fit into size_t) into *VAL. Return the address of the
2037 suffix after the integer. If MSGID is NULL, return NULL after
2038 failure; otherwise, report MSGID and exit on failure. */
2041 parse_field_count (char const *string, size_t *val, char const *msgid)
2046 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2049 case LONGINT_INVALID_SUFFIX_CHAR:
2054 case LONGINT_OVERFLOW:
2056 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2057 _(msgid), (int) (suffix - string), string);
2060 case LONGINT_INVALID:
2062 error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2070 /* Handle interrupts and hangups. */
2073 sighandler (int sig)
2075 #ifndef SA_NOCLDSTOP
2076 signal (sig, SIG_IGN);
2083 struct sigaction sigact;
2085 sigact.sa_handler = SIG_DFL;
2086 sigemptyset (&sigact.sa_mask);
2087 sigact.sa_flags = 0;
2088 sigaction (sig, &sigact, NULL);
2091 signal (sig, SIG_DFL);
2094 kill (process_id, sig);
2097 /* Set the ordering options for KEY specified in S.
2098 Return the address of the first character in S that
2099 is not a valid ordering option.
2100 BLANKTYPE is the kind of blanks that 'b' should skip. */
2103 set_ordering (register const char *s, struct keyfield *key,
2104 enum blanktype blanktype)
2111 if (blanktype == bl_start || blanktype == bl_both)
2112 key->skipsblanks = 1;
2113 if (blanktype == bl_end || blanktype == bl_both)
2114 key->skipeblanks = 1;
2117 key->ignore = nondictionary;
2120 key->translate = fold_toupper;
2123 key->general_numeric = 1;
2126 key->ignore = nonprinting;
2145 static struct keyfield *
2148 struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key);
2154 main (int argc, char **argv)
2156 struct keyfield *key;
2157 struct keyfield gkey;
2161 int checkonly = 0, mergeonly = 0, nfiles = 0;
2162 int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);
2163 char *minus = "-", **files;
2164 char const *outfile = minus;
2165 static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2166 int nsigs = sizeof sigs / sizeof *sigs;
2168 struct sigaction oldact, newact;
2171 program_name = argv[0];
2172 process_id = getpid ();
2173 setlocale (LC_ALL, "");
2174 bindtextdomain (PACKAGE, LOCALEDIR);
2175 textdomain (PACKAGE);
2179 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2180 # if HAVE_NL_LANGINFO
2181 hard_LC_TIME = hard_locale (LC_TIME);
2184 /* Let's get locale's representation of the decimal point */
2186 struct lconv *lconvp = localeconv ();
2188 /* If the locale doesn't define a decimal point, or if the decimal
2189 point is multibyte, use the C decimal point. We don't support
2190 multibyte decimal points yet. */
2191 decimal_point = *lconvp->decimal_point;
2192 if (! decimal_point || lconvp->decimal_point[1])
2193 decimal_point = C_DECIMAL_POINT;
2195 /* We don't support multibyte thousands separators yet. */
2196 th_sep = *lconvp->thousands_sep;
2197 if (! th_sep || lconvp->thousands_sep[1])
2198 th_sep = CHAR_MAX + 1;
2203 have_read_stdin = 0;
2206 /* Change the way xmalloc and xrealloc fail. */
2207 xalloc_exit_failure = SORT_FAILURE;
2208 xalloc_fail_func = cleanup;
2211 sigemptyset (&caught_signals);
2212 for (i = 0; i < nsigs; i++)
2213 sigaddset (&caught_signals, sigs[i]);
2214 newact.sa_handler = sighandler;
2215 newact.sa_mask = caught_signals;
2216 newact.sa_flags = 0;
2219 for (i = 0; i < nsigs; i++)
2223 sigaction (sig, NULL, &oldact);
2224 if (oldact.sa_handler != SIG_IGN)
2225 sigaction (sig, &newact, NULL);
2227 if (signal (sig, SIG_IGN) != SIG_IGN)
2228 signal (sig, sighandler);
2232 gkey.sword = gkey.eword = -1;
2234 gkey.translate = NULL;
2235 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2236 gkey.skipsblanks = gkey.skipeblanks = 0;
2238 files = (char **) xmalloc (sizeof (char *) * argc);
2242 /* Parse an operand as a file after "--" was seen; or if
2243 pedantic and a file was seen, unless -c was not seen and the
2244 operand is "-o FILE" or "-oFILE". POSIX 1003.1-200x d5
2245 removes the exception for the -o options; when it becomes
2246 official the code will need to be changed. */
2249 || (posix_pedantic && nfiles != 0
2252 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2253 && (argv[optind][2] || optind + 1 != argc)))
2254 || ((c = getopt_long (argc, argv,
2255 "-bcdfgik:mMno:rsS:t:T:uy::z",
2256 long_options, NULL))
2261 files[nfiles++] = argv[optind++];
2266 /* Treat +POS1 [-POS2] as a key if possible; but silently
2267 treat an operand as a file if it is not a valid +POS1.
2268 POSIX 1003.1-200x d5 does not allow support for +POS1, so
2269 when it becomes official this code will need to be
2272 if (optarg[0] == '+')
2275 s = parse_field_count (optarg + 1, &key->sword, NULL);
2277 s = parse_field_count (s + 1, &key->schar, NULL);
2278 if (! (key->sword | key->schar))
2280 if (! s || *set_ordering (s, key, bl_start))
2287 if (optind != argc && argv[optind][0] == '-'
2288 && ISDIGIT (argv[optind][1]))
2290 char const *optarg1 = argv[optind++];
2291 s = parse_field_count (optarg1 + 1, &key->eword,
2292 N_("invalid number after `-'"));
2294 s = parse_field_count (s + 1, &key->echar,
2295 N_("invalid number after `.'"));
2296 if (*set_ordering (s, key, bl_end))
2297 badfieldspec (optarg1,
2298 N_("stray character in field spec"));
2304 files[nfiles++] = optarg;
2319 set_ordering (str, &gkey, bl_both);
2331 s = parse_field_count (optarg, &key->sword,
2332 N_("invalid number at field start"));
2335 /* Provoke with `sort -k0' */
2336 badfieldspec (optarg, N_("field number is zero"));
2340 s = parse_field_count (s + 1, &key->schar,
2341 N_("invalid number after `.'"));
2344 /* Provoke with `sort -k1.0' */
2345 badfieldspec (optarg, N_("character offset is zero"));
2348 if (! (key->sword | key->schar))
2350 s = set_ordering (s, key, bl_start);
2359 s = parse_field_count (s + 1, &key->eword,
2360 N_("invalid number after `,'"));
2363 /* Provoke with `sort -k1,0' */
2364 badfieldspec (optarg, N_("field number is zero"));
2367 s = parse_field_count (s + 1, &key->echar,
2368 N_("invalid number after `.'"));
2371 /* `-k 2,3' is equivalent to `+1 -3'. */
2374 s = set_ordering (s, key, bl_end);
2377 badfieldspec (optarg, N_("stray character in field spec"));
2394 specify_sort_size (optarg);
2399 if (tab && optarg[1])
2401 /* Provoke with `sort -txx'. Complain about
2402 "multi-character tab" instead of "multibyte tab", so
2403 that the diagnostic's wording does not need to be
2404 changed once multibyte characters are supported. */
2405 error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg);
2410 add_temp_dir (optarg);
2418 /* Accept and ignore e.g. -y0 for compatibility with Solaris
2419 2.x through Solaris 7. -y is marked as obsolete starting
2427 case_GETOPT_HELP_CHAR;
2429 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2432 usage (SORT_FAILURE);
2436 /* Inheritance of global options to individual keys. */
2437 for (key = keylist; key; key = key->next)
2438 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2439 && !key->skipeblanks && !key->month && !key->numeric
2440 && !key->general_numeric)
2442 key->ignore = gkey.ignore;
2443 key->translate = gkey.translate;
2444 key->skipsblanks = gkey.skipsblanks;
2445 key->skipeblanks = gkey.skipeblanks;
2446 key->month = gkey.month;
2447 key->numeric = gkey.numeric;
2448 key->general_numeric = gkey.general_numeric;
2449 key->reverse = gkey.reverse;
2452 if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks
2453 || gkey.skipeblanks || gkey.month || gkey.numeric
2454 || gkey.general_numeric))
2456 reverse = gkey.reverse;
2458 if (temp_dir_count == 0)
2460 char const *tmp_dir = getenv ("TMPDIR");
2461 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2473 error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2476 /* POSIX requires that sort return 1 IFF invoked with -c and the
2477 input is not properly sorted. */
2478 exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2483 int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2484 merge (files, nfiles, max_merge, outfile);
2487 sort (files, nfiles, outfile);
2489 if (have_read_stdin && fclose (stdin) == EOF)
2490 die (_("close failed"), "-");
2493 exit (EXIT_SUCCESS);