1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
32 #include "long-options.h"
34 #include "hard-locale.h"
38 #include "stdio-safer.h"
42 #if HAVE_SYS_RESOURCE_H
43 # include <sys/resource.h>
46 struct rlimit { size_t rlim_cur; };
47 # define getrlimit(Resource, Rlp) (-1)
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "sort"
53 #define AUTHORS N_ ("Mike Haertel and Paul Eggert")
55 #if defined ENABLE_NLS && HAVE_LANGINFO_H
56 # include <langinfo.h>
60 # define sigprocmask(How, Set, Oset) /* empty */
68 /* Undefine, to avoid warning about redefinition on some systems. */
69 /* FIXME: Remove these: use MIN/MAX from sys2.h. */
71 #define min(a, b) ((a) < (b) ? (a) : (b))
73 #define max(a, b) ((a) > (b) ? (a) : (b))
75 #define UCHAR_LIM (UCHAR_MAX + 1)
76 #define UCHAR(c) ((unsigned char) (c))
78 #ifndef DEFAULT_TMPDIR
79 # define DEFAULT_TMPDIR "/tmp"
82 /* Use this as exit status in case of error, not EXIT_FAILURE. This
83 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
84 that sort exit with status 1 IFF invoked with -c and the input is
85 not properly sorted. Any other irregular exit must exit with a
86 status code greater than 1. */
87 #define SORT_FAILURE 2
88 #define SORT_OUT_OF_ORDER 1
90 #define C_DECIMAL_POINT '.'
91 #define NEGATION_SIGN '-'
92 #define NUMERIC_ZERO '0'
96 static char decimal_point;
97 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
99 /* Nonzero if the corresponding locales are hard. */
100 static int hard_LC_COLLATE;
101 # if HAVE_NL_LANGINFO
102 static int hard_LC_TIME;
105 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
109 # define decimal_point C_DECIMAL_POINT
110 # define IS_THOUSANDS_SEP(x) 0
114 #define NONZERO(x) (x != 0)
116 /* The kind of blanks for '-b' to skip in various options. */
117 enum blanktype { bl_start, bl_end, bl_both };
119 /* The character marking end of line. Default to \n. */
120 static int eolchar = '\n';
122 /* Lines are held in core as counted strings. */
125 char *text; /* Text of the line. */
126 size_t length; /* Length including final newline. */
127 char *keybeg; /* Start of first key. */
128 char *keylim; /* Limit of first key. */
134 char *buf; /* Dynamically allocated buffer,
135 partitioned into 3 regions:
138 - an array of lines, in reverse order. */
139 size_t used; /* Number of bytes used for input data. */
140 size_t nlines; /* Number of lines in the line array. */
141 size_t alloc; /* Number of bytes allocated. */
142 size_t left; /* Number of bytes left from previous reads. */
143 size_t line_bytes; /* Number of bytes to reserve for each line. */
144 int eof; /* An EOF has been read. */
149 size_t sword; /* Zero-origin 'word' to start at. */
150 size_t schar; /* Additional characters to skip. */
151 int skipsblanks; /* Skip leading white space at start. */
152 size_t eword; /* Zero-origin first word after field. */
153 size_t echar; /* Additional characters in field. */
154 int skipeblanks; /* Skip trailing white space at finish. */
155 int *ignore; /* Boolean array of characters to ignore. */
156 char *translate; /* Translation applied to characters. */
157 int numeric; /* Flag for numeric comparison. Handle
158 strings of digits with optional decimal
159 point, but no exponential notation. */
160 int general_numeric; /* Flag for general, numeric comparison.
161 Handle numbers in exponential notation. */
162 int month; /* Flag for comparison by month name. */
163 int reverse; /* Reverse the sense of comparison. */
164 struct keyfield *next; /* Next keyfield to try. */
173 /* The name this program was run with. */
176 /* FIXME: None of these tables work with multibyte character sets.
177 Also, there are many other bugs when handling multibyte characters,
178 or even unibyte encodings where line boundaries are not in the
179 initial shift state. One way to fix this is to rewrite `sort' to
180 use wide characters internally, but doing this with good
181 performance is a bit tricky. */
183 /* Table of white space. */
184 static int blanks[UCHAR_LIM];
186 /* Table of non-printing characters. */
187 static int nonprinting[UCHAR_LIM];
189 /* Table of non-dictionary characters (not letters, digits, or blanks). */
190 static int nondictionary[UCHAR_LIM];
192 /* Translation table folding lower case to upper. */
193 static char fold_toupper[UCHAR_LIM];
195 #define MONTHS_PER_YEAR 12
197 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
198 # define MONTHTAB_CONST /* empty */
200 # define MONTHTAB_CONST const
203 /* Table mapping month names to integers.
204 Alphabetic order allows binary search. */
205 static MONTHTAB_CONST struct month monthtab[] =
221 /* During the merge phase, the number of files to merge at once. */
224 /* Minimum size for a merge or check buffer. */
225 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
227 /* Minimum sort size; the code might not work with smaller sizes. */
228 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
230 /* The number of bytes needed for a merge or check buffer, which can
231 function relatively efficiently even if it holds only one line. If
232 a longer line is seen, this value is increased. */
233 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
235 /* The approximate maximum number of bytes of main memory to use, as
236 specified by the user. Zero if the user has not specified a size. */
237 static size_t sort_size;
239 /* The guessed size for non-regular files. */
240 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
242 /* Array of directory names in which any temporary files are to be created. */
243 static char const **temp_dirs;
245 /* Number of temporary directory names used. */
246 static size_t temp_dir_count;
248 /* Number of allocated slots in temp_dirs. */
249 static size_t temp_dir_alloc;
251 /* Our process ID. */
252 static pid_t process_id;
254 /* Flag to reverse the order of all comparisons. */
257 /* Flag for stable sort. This turns off the last ditch bytewise
258 comparison of lines, and instead leaves lines in the same order
259 they were read if all keys compare equal. */
262 /* Tab character separating fields. If NUL, then fields are separated
263 by the empty string between a non-whitespace character and a whitespace
267 /* Flag to remove consecutive duplicate lines from the output.
268 Only the last of a sequence of equal lines will be output. */
271 /* Nonzero if any of the input files are the standard input. */
272 static int have_read_stdin;
274 /* List of key field comparisons to be tried. */
275 static struct keyfield *keylist;
281 fprintf (stderr, _("Try `%s --help' for more information.\n"),
286 Usage: %s [OPTION]... [FILE]...\n\
290 Write sorted concatenation of all FILE(s) to standard output.\n\
294 -b, --ignore-leading-blanks ignore leading blanks\n\
295 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
296 -f, --ignore-case fold lower case to upper case characters\n\
297 -g, --general-numeric-sort compare according to general numerical value\n\
298 -i, --ignore-nonprinting consider only printable characters\n\
299 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
300 -n, --numeric-sort compare according to string numerical value\n\
301 -r, --reverse reverse the result of comparisons\n\
308 -c, --check check whether input is sorted; do not sort\n\
309 -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\
310 -m, --merge merge already sorted files; do not sort\n\
311 -o, --output=FILE write result to FILE instead of standard output\n\
312 -s, --stable stabilize sort by disabling last-resort comparison\n\
313 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
314 -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\
315 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s\n\
316 multiple options specify multiple directories\n\
317 -u, --unique with -c: check for strict ordering\n\
318 otherwise: output only the first of an equal run\n\
319 -z, --zero-terminated end lines with 0 byte, not newline\n\
320 +POS1 [-POS2] start a key at POS1, end it before POS2 (origin 0)\n\
321 Warning: this option is obsolescent\n\
322 --help display this help and exit\n\
323 --version output version information and exit\n\
328 POS is F[.C][OPTS], where F is the field number and C the character position\n\
329 in the field, both counted from one with -k, from zero with the obsolescent\n\
330 form. OPTS is made up of one or more single-letter ordering options, which\n\
331 override global ordering options for that key. If no key is given, use the\n\
332 entire line as the key.\n\
334 SIZE may be followed by the following multiplicative suffixes:\n\
335 %% 1%% of memory, b 1, k 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
337 With no FILE, or when FILE is -, read standard input.\n\
340 The locale specified by the environment affects sort order.\n\
341 Set LC_COLLATE=C to get the traditional sort order that uses\n\
342 native byte values.\n\
345 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
347 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
348 POSIX requires that sort return 1 IFF invoked with -c and
349 the input is not properly sorted. */
350 assert (status == 0 || status == SORT_FAILURE);
354 static struct option const long_options[] =
356 {"ignore-leading-blanks", no_argument, NULL, 'b'},
357 {"check", no_argument, NULL, 'c'},
358 {"dictionary-order", no_argument, NULL, 'd'},
359 {"ignore-case", no_argument, NULL, 'f'},
360 {"general-numeric-sort", no_argument, NULL, 'g'},
361 {"ignore-nonprinting", no_argument, NULL, 'i'},
362 {"key", required_argument, NULL, 'k'},
363 {"merge", no_argument, NULL, 'm'},
364 {"month-sort", no_argument, NULL, 'M'},
365 {"numeric-sort", no_argument, NULL, 'n'},
366 {"output", required_argument, NULL, 'o'},
367 {"reverse", no_argument, NULL, 'r'},
368 {"stable", no_argument, NULL, 's'},
369 {"buffer-size", required_argument, NULL, 'S'},
370 {"field-separator", required_argument, NULL, 't'},
371 {"temporary-directory", required_argument, NULL, 'T'},
372 {"unique", no_argument, NULL, 'u'},
373 {"zero-terminated", no_argument, NULL, 'z'},
374 {GETOPT_HELP_OPTION_DECL},
375 {GETOPT_VERSION_OPTION_DECL},
379 /* The set of signals that are caught. */
380 static sigset_t caught_signals;
382 /* The list of temporary files. */
385 struct tempnode *volatile next;
386 char name[1]; /* Actual size is 1 + file name length. */
388 static struct tempnode *volatile temphead;
390 /* Clean up any remaining temporary files. */
395 struct tempnode *node;
397 for (node = temphead; node; node = node->next)
401 /* Report MESSAGE for FILE, then clean up and exit. */
403 static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN;
405 die (char const *message, char const *file)
407 error (0, errno, "%s: %s", message, file);
412 /* Create a new temporary file, returning its newly allocated name.
413 Store into *PFP a stream open for writing. */
416 create_temp_file (FILE **pfp)
418 static char const slashbase[] = "/sortXXXXXX";
419 static size_t temp_dir_index;
423 char const *temp_dir = temp_dirs[temp_dir_index];
424 size_t len = strlen (temp_dir);
425 struct tempnode *node =
426 (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase);
427 char *file = node->name;
429 memcpy (file, temp_dir, len);
430 memcpy (file + len, slashbase, sizeof slashbase);
431 node->next = temphead;
432 if (++temp_dir_index == temp_dir_count)
435 /* Create the temporary file in a critical section, to avoid races. */
436 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
441 sigprocmask (SIG_SETMASK, &oldset, NULL);
444 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
445 die (_("cannot create temporary file"), file);
451 xfopen (const char *file, const char *how)
455 if (STREQ (file, "-"))
467 if ((fp = fopen_safer (file, how)) == NULL)
468 die (_("open failed"), file);
474 /* Close FP, whose name is FILE, and report any errors. */
477 xfclose (FILE *fp, char const *file)
481 /* Allow reading stdin from tty more than once. */
487 if (fclose (fp) != 0)
488 die (_("close failed"), file);
493 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
495 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
496 die (_("write failed"), output_file);
499 /* Append DIR to the array of temporary directory names. */
501 add_temp_dir (char const *dir)
503 if (temp_dir_count == temp_dir_alloc)
505 temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
506 temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
509 temp_dirs[temp_dir_count++] = dir;
512 /* Search through the list of temporary files for NAME;
513 remove it if it is found on the list. */
516 zaptemp (const char *name)
518 struct tempnode *volatile *pnode;
519 struct tempnode *node;
521 for (pnode = &temphead; (node = *pnode); pnode = &node->next)
522 if (node->name == name)
526 free ((char *) node);
534 struct_month_cmp (const void *m1, const void *m2)
536 return strcmp (((const struct month *) m1)->name,
537 ((const struct month *) m2)->name);
542 /* Initialize the character class tables. */
549 for (i = 0; i < UCHAR_LIM; ++i)
555 if (!ISALNUM (i) && !ISBLANK (i))
556 nondictionary[i] = 1;
558 fold_toupper[i] = toupper (i);
563 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
564 /* If we're not in the "C" locale, read different names for months. */
567 for (i = 0; i < MONTHS_PER_YEAR; i++)
574 s = (char *) nl_langinfo (ABMON_1 + i);
576 monthtab[i].name = name = (char *) xmalloc (s_len + 1);
577 monthtab[i].val = i + 1;
579 for (j = 0; j < s_len; j++)
580 name[j] = fold_toupper[UCHAR (s[j])];
583 qsort ((void *) monthtab, MONTHS_PER_YEAR,
584 sizeof (struct month), struct_month_cmp);
589 /* Specify the amount of main memory to use when sorting. */
591 specify_sort_size (char const *s)
595 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkmMPtTYZ");
597 /* The default unit is kB. */
598 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
600 if (n <= UINTMAX_MAX / 1024)
603 e = LONGINT_OVERFLOW;
606 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
607 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
616 double mem = physmem_total () * n / 100;
618 /* Use "<", not "<=", to avoid problems with rounding. */
619 if (mem < UINTMAX_MAX)
625 e = LONGINT_OVERFLOW;
635 sort_size = MAX (sort_size, MIN_SORT_SIZE);
639 e = LONGINT_OVERFLOW;
642 STRTOL_FATAL_ERROR (s, _("sort size"), e);
645 /* Return the default sort size. */
647 default_sort_size (void)
649 /* Let MEM be available memory or 1/8 of total memory, whichever
651 double avail = physmem_available ();
652 double total = physmem_total ();
653 double mem = MAX (avail, total / 8);
654 struct rlimit rlimit;
656 /* Let SIZE be MEM, but no more than the maximum object size or
657 system resource limits. Avoid the MIN macro here, as it is not
658 quite right when only one argument is floating point. Don't
659 bother to check for values like RLIM_INFINITY since in practice
660 they are not much less than SIZE_MAX. */
661 size_t size = SIZE_MAX;
664 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
665 size = rlimit.rlim_cur;
667 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
668 size = rlimit.rlim_cur;
671 /* Leave a large safety margin for the above limits, as failure can
672 occur when they are exceeded. */
676 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
677 Exceeding RSS is not fatal, but can be quite slow. */
678 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
679 size = rlimit.rlim_cur / 16 * 15;
682 /* Use no less than the minimum. */
683 return MAX (size, MIN_SORT_SIZE);
686 /* Return the sort buffer size to use with the input files identified
687 by FPS and FILES, which are alternate paths to the same files.
688 NFILES gives the number of input files; NFPS may be less. Assume
689 that each input line requires LINE_BYTES extra bytes' worth of line
690 information. Return at most SIZE_BOUND. */
693 sort_buffer_size (FILE *const *fps, int nfps,
694 char *const *files, int nfiles,
695 size_t line_bytes, size_t size_bound)
697 /* In the worst case, each input byte is a newline. */
698 size_t worst_case_per_input_byte = line_bytes + 1;
700 /* Keep enough room for one extra input line and an extra byte.
701 This extra room might be needed when preparing to read EOF. */
702 size_t size = worst_case_per_input_byte + 1;
706 for (i = 0; i < nfiles; i++)
712 if ((i < nfps ? fstat (fileno (fps[i]), &st)
713 : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
714 : stat (files[i], &st))
716 die (_("stat failed"), files[i]);
718 file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;
720 /* Add the amount of memory needed to represent the worst case
721 where the input consists entirely of newlines followed by a
722 single non-newline. Check for overflow. */
723 worst_case = file_size * worst_case_per_input_byte + 1;
724 if (file_size != worst_case / worst_case_per_input_byte
725 || size_bound - size <= worst_case)
733 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
734 must be at least sizeof (struct line). Allocate ALLOC bytes
738 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
740 /* Ensure that the line array is properly aligned. If the desired
741 size cannot be allocated, repeatedly halve it until allocation
742 succeeds. The smaller allocation may hurt overall performance,
743 but that's better than failing. */
746 alloc += sizeof (struct line) - alloc % sizeof (struct line);
747 buf->buf = malloc (alloc);
751 if (alloc <= line_bytes + 1)
755 buf->line_bytes = line_bytes;
757 buf->used = buf->left = buf->nlines = 0;
761 /* Return one past the limit of the line array. */
763 static inline struct line *
764 buffer_linelim (struct buffer const *buf)
766 return (struct line *) (buf->buf + buf->alloc);
769 /* Return a pointer to the first character of the field specified
773 begfield (const struct line *line, const struct keyfield *key)
775 register char *ptr = line->text, *lim = ptr + line->length - 1;
776 register size_t sword = key->sword;
777 register size_t schar = key->schar;
780 while (ptr < lim && sword--)
782 while (ptr < lim && *ptr != tab)
788 while (ptr < lim && sword--)
790 while (ptr < lim && blanks[UCHAR (*ptr)])
792 while (ptr < lim && !blanks[UCHAR (*ptr)])
796 if (key->skipsblanks)
797 while (ptr < lim && blanks[UCHAR (*ptr)])
800 if (schar < lim - ptr)
808 /* Return the limit of (a pointer to the first character after) the field
809 in LINE specified by KEY. */
812 limfield (const struct line *line, const struct keyfield *key)
814 register char *ptr = line->text, *lim = ptr + line->length - 1;
815 register size_t eword = key->eword, echar = key->echar;
817 /* Note: from the POSIX spec:
818 The leading field separator itself is included in
819 a field when -t is not used. FIXME: move this comment up... */
821 /* Move PTR past EWORD fields or to one past the last byte on LINE,
822 whichever comes first. If there are more than EWORD fields, leave
823 PTR pointing at the beginning of the field having zero-based index,
824 EWORD. If a delimiter character was specified (via -t), then that
825 `beginning' is the first character following the delimiting TAB.
826 Otherwise, leave PTR pointing at the first `blank' character after
827 the preceding field. */
829 while (ptr < lim && eword--)
831 while (ptr < lim && *ptr != tab)
833 if (ptr < lim && (eword | echar))
837 while (ptr < lim && eword--)
839 while (ptr < lim && blanks[UCHAR (*ptr)])
841 while (ptr < lim && !blanks[UCHAR (*ptr)])
845 #ifdef POSIX_UNSPECIFIED
846 /* The following block of code makes GNU sort incompatible with
847 standard Unix sort, so it's ifdef'd out for now.
848 The POSIX spec isn't clear on how to interpret this.
849 FIXME: request clarification.
851 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
852 Date: Thu, 30 May 96 12:20:41 -0400
854 [...]I believe I've found another bug in `sort'.
859 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
862 $ /bin/sort +0.6 -0.7 </tmp/sort.in
866 Unix sort produced the answer I expected: sort on the single character
867 in column 6. GNU sort produced different results, because it disagrees
868 on the interpretation of the key-end spec "-M.N". Unix sort reads this
869 as "skip M fields, then N characters"; but GNU sort wants it to mean
870 "skip M fields, then either N characters or the rest of the current
871 field, whichever comes first". This extra clause applies only to
872 key-ends, not key-starts.
875 /* Make LIM point to the end of (one byte past) the current field. */
879 newlim = memchr (ptr, tab, lim - ptr);
887 while (newlim < lim && blanks[UCHAR (*newlim)])
889 while (newlim < lim && !blanks[UCHAR (*newlim)])
895 /* If we're skipping leading blanks, don't start counting characters
896 until after skipping past any leading blanks. */
897 if (key->skipsblanks)
898 while (ptr < lim && blanks[UCHAR (*ptr)])
901 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
902 if (echar < lim - ptr)
913 trim_trailing_blanks (const char *a_start, char **a_end)
915 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
919 /* Fill BUF reading from FP, moving buf->left bytes from the end
920 of buf->buf to the beginning first. If EOF is reached and the
921 file wasn't terminated by a newline, supply one. Set up BUF's line
922 table too. FILE is the name of the file corresponding to FP.
923 Return nonzero if some input was read. */
926 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
928 struct keyfield const *key = keylist;
930 size_t line_bytes = buf->line_bytes;
931 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
936 if (buf->used != buf->left)
938 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
939 buf->used = buf->left;
945 char *ptr = buf->buf + buf->used;
946 struct line *linelim = buffer_linelim (buf);
947 struct line *line = linelim - buf->nlines;
948 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
949 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
951 while (line_bytes + 1 < avail)
953 /* Read as many bytes as possible, but do not read so many
954 bytes that there might not be enough room for the
955 corresponding line array. The worst case is when the
956 rest of the input file consists entirely of newlines,
957 except that the last byte is not a newline. */
958 size_t readsize = (avail - 1) / (line_bytes + 1);
959 size_t bytes_read = fread (ptr, 1, readsize, fp);
960 char *ptrlim = ptr + bytes_read;
964 if (bytes_read != readsize)
967 die (_("read failed"), file);
971 if (buf->buf == ptrlim)
973 if (ptrlim[-1] != eol)
978 /* Find and record each line in the just-read input. */
979 while ((p = memchr (ptr, eol, ptrlim - ptr)))
983 line->text = line_start;
984 line->length = ptr - line_start;
985 mergesize = MAX (mergesize, line->length);
990 /* Precompute the position of the first key for
992 line->keylim = key->eword == -1 ? p : limfield (line, key);
994 if (key->sword != -1)
995 line->keybeg = begfield (line, key);
998 if (key->skipsblanks)
999 while (blanks[UCHAR (*line_start)])
1001 line->keybeg = line_start;
1003 if (key->skipeblanks)
1004 trim_trailing_blanks (line->keybeg, &line->keylim);
1015 buf->used = ptr - buf->buf;
1016 buf->nlines = buffer_linelim (buf) - line;
1017 if (buf->nlines != 0)
1019 buf->left = ptr - line_start;
1020 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1024 /* The current input line is too long to fit in the buffer.
1025 Double the buffer size and try again. */
1026 if (2 * buf->alloc < buf->alloc)
1029 buf->buf = xrealloc (buf->buf, buf->alloc);
1033 /* Compare strings A and B containing decimal fractions < 1. Each string
1034 should begin with a decimal point followed immediately by the digits
1035 of the fraction. Strings not of this form are considered to be zero. */
1037 /* The goal here, is to take two numbers a and b... compare these
1038 in parallel. Instead of converting each, and then comparing the
1039 outcome. Most likely stopping the comparison before the conversion
1040 is complete. The algorithm used, in the old sort:
1042 Algorithm: fraccompare
1043 Action : compare two decimal fractions
1044 accepts : char *a, char *b
1045 returns : -1 if a<b, 0 if a=b, 1 if a>b.
1048 if *a == decimal_point AND *b == decimal_point
1049 find first character different in a and b.
1050 if both are digits, return the difference *a - *b.
1053 if digit return 1, else 0
1056 if digit return -1, else 0
1057 if *a is a decimal_point
1058 skip past decimal_point and zeros
1059 if digit return 1, else 0
1060 if *b is a decimal_point
1061 skip past decimal_point and zeros
1062 if digit return -1, else 0
1066 fraccompare (register const char *a, register const char *b)
1068 if (*a == decimal_point && *b == decimal_point)
1070 while (*++a == *++b)
1073 if (ISDIGIT (*a) && ISDIGIT (*b))
1076 goto a_trailing_nonzero;
1078 goto b_trailing_nonzero;
1081 else if (*a++ == decimal_point)
1084 while (*a == NUMERIC_ZERO)
1086 return ISDIGIT (*a);
1088 else if (*b++ == decimal_point)
1091 while (*b == NUMERIC_ZERO)
1093 return - ISDIGIT (*b);
1098 /* Compare strings A and B as numbers without explicitly converting them to
1099 machine numbers. Comparatively slow for short strings, but asymptotically
1103 numcompare (register const char *a, register const char *b)
1105 register int tmpa, tmpb, tmp;
1106 register size_t loga, logb;
1111 while (blanks[UCHAR (tmpa)])
1113 while (blanks[UCHAR (tmpb)])
1116 if (tmpa == NEGATION_SIGN)
1120 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1121 if (tmpb != NEGATION_SIGN)
1123 if (tmpa == decimal_point)
1126 while (tmpa == NUMERIC_ZERO);
1129 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1131 if (tmpb == decimal_point)
1134 while (tmpb == NUMERIC_ZERO);
1141 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1143 while (tmpa == tmpb && ISDIGIT (tmpa))
1147 while (IS_THOUSANDS_SEP (tmpa));
1150 while (IS_THOUSANDS_SEP (tmpb));
1153 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1154 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1155 return -fraccompare (a, b);
1159 for (loga = 0; ISDIGIT (tmpa); ++loga)
1162 while (IS_THOUSANDS_SEP (tmpa));
1164 for (logb = 0; ISDIGIT (tmpb); ++logb)
1167 while (IS_THOUSANDS_SEP (tmpb));
1170 return loga < logb ? 1 : -1;
1177 else if (tmpb == NEGATION_SIGN)
1181 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1182 if (tmpb == decimal_point)
1185 while (tmpb == NUMERIC_ZERO);
1188 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1190 if (tmpa == decimal_point)
1193 while (tmpa == NUMERIC_ZERO);
1200 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1202 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1205 while (tmpa == tmpb && ISDIGIT (tmpa))
1209 while (IS_THOUSANDS_SEP (tmpa));
1212 while (IS_THOUSANDS_SEP (tmpb));
1215 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1216 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1217 return fraccompare (a, b);
1221 for (loga = 0; ISDIGIT (tmpa); ++loga)
1224 while (IS_THOUSANDS_SEP (tmpa));
1226 for (logb = 0; ISDIGIT (tmpb); ++logb)
1229 while (IS_THOUSANDS_SEP (tmpb));
1232 return loga < logb ? -1 : 1;
1242 general_numcompare (const char *sa, const char *sb)
1244 /* FIXME: add option to warn about failed conversions. */
1245 /* FIXME: maybe add option to try expensive FP conversion
1246 only if A and B can't be compared more cheaply/accurately. */
1250 double a = strtod (sa, &ea);
1251 double b = strtod (sb, &eb);
1253 /* Put conversion errors at the start of the collating sequence. */
1255 return sb == eb ? 0 : -1;
1259 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1260 conversion errors but before numbers; sort them by internal
1261 bit-pattern, for lack of a more portable alternative. */
1267 : memcmp ((char *) &a, (char *) &b, sizeof a));
1270 /* Return an integer in 1..12 of the month name S with length LEN.
1271 Return 0 if the name in S is not recognized. */
1274 getmonth (const char *s, size_t len)
1278 register int lo = 0, hi = MONTHS_PER_YEAR, result;
1280 while (len > 0 && blanks[UCHAR (*s)])
1289 month = (char *) alloca (len + 1);
1290 for (i = 0; i < len; ++i)
1291 month[i] = fold_toupper[UCHAR (s[i])];
1292 while (blanks[UCHAR (month[i - 1])])
1298 int ix = (lo + hi) / 2;
1300 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1305 while (hi - lo > 1);
1307 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1308 ? monthtab[lo].val : 0);
1313 /* Compare two lines A and B trying every key in sequence until there
1314 are no more keys or a difference is found. */
1317 keycompare (const struct line *a, const struct line *b)
1319 struct keyfield *key = keylist;
1321 /* For the first iteration only, the key positions have been
1322 precomputed for us. */
1323 register char *texta = a->keybeg;
1324 register char *textb = b->keybeg;
1325 register char *lima = a->keylim;
1326 register char *limb = b->keylim;
1332 register unsigned char *translate = (unsigned char *) key->translate;
1333 register int *ignore = key->ignore;
1335 /* Find the lengths. */
1336 size_t lena = lima <= texta ? 0 : lima - texta;
1337 size_t lenb = limb <= textb ? 0 : limb - textb;
1339 if (key->skipeblanks)
1341 char *a_end = texta + lena;
1342 char *b_end = textb + lenb;
1343 trim_trailing_blanks (texta, &a_end);
1344 trim_trailing_blanks (textb, &b_end);
1345 lena = a_end - texta;
1346 lenb = b_end - textb;
1349 /* Actually compare the fields. */
1350 if (key->numeric | key->general_numeric)
1352 char savea = *lima, saveb = *limb;
1354 *lima = *limb = '\0';
1355 diff = ((key->numeric ? numcompare : general_numcompare)
1357 *lima = savea, *limb = saveb;
1359 else if (key->month)
1360 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1362 /* Sorting like this may become slow, so in a simple locale the user
1363 can select a faster sort that is similar to ascii sort */
1364 else if (hard_LC_COLLATE)
1366 if (ignore || translate)
1368 char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1369 char *copy_b = copy_a + lena + 1;
1370 size_t new_len_a, new_len_b, i;
1372 /* Ignore and/or translate chars before comparing. */
1373 for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1377 copy_a[new_len_a] = (translate
1378 ? translate[UCHAR (texta[i])]
1380 if (!ignore || !ignore[UCHAR (texta[i])])
1385 copy_b[new_len_b] = (translate
1386 ? translate[UCHAR (textb[i])]
1388 if (!ignore || !ignore[UCHAR (textb[i])])
1393 diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
1396 diff = - NONZERO (lenb);
1400 diff = memcoll (texta, lena, textb, lenb);
1405 #define CMP_WITH_IGNORE(A, B) \
1410 while (texta < lima && ignore[UCHAR (*texta)]) \
1412 while (textb < limb && ignore[UCHAR (*textb)]) \
1414 if (! (texta < lima && textb < limb)) \
1416 diff = UCHAR (A) - UCHAR (B); \
1423 diff = (lima - texta) - (limb - textb); \
1428 CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1429 translate[UCHAR (*textb)]);
1431 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1434 diff = - NONZERO (lenb);
1441 while (texta < lima && textb < limb)
1443 diff = (UCHAR (translate[UCHAR (*texta++)])
1444 - UCHAR (translate[UCHAR (*textb++)]));
1451 diff = memcmp (texta, textb, min (lena, lenb));
1455 diff = lena < lenb ? -1 : lena != lenb;
1465 /* Find the beginning and limit of the next field. */
1466 if (key->eword != -1)
1467 lima = limfield (a, key), limb = limfield (b, key);
1469 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1471 if (key->sword != -1)
1472 texta = begfield (a, key), textb = begfield (b, key);
1475 texta = a->text, textb = b->text;
1476 if (key->skipsblanks)
1478 while (texta < lima && blanks[UCHAR (*texta)])
1480 while (textb < limb && blanks[UCHAR (*textb)])
1491 return key->reverse ? -diff : diff;
1494 /* Compare two lines A and B, returning negative, zero, or positive
1495 depending on whether A compares less than, equal to, or greater than B. */
1498 compare (register const struct line *a, register const struct line *b)
1503 /* First try to compare on the specified keys (if any).
1504 The only two cases with no key at all are unadorned sort,
1505 and unadorned sort -r. */
1508 diff = keycompare (a, b);
1510 if (diff != 0 || unique || stable)
1514 /* If the keys all compare equal (or no keys were specified)
1515 fall through to the default comparison. */
1516 alen = a->length - 1, blen = b->length - 1;
1519 diff = - NONZERO (blen);
1521 diff = NONZERO (alen);
1523 else if (hard_LC_COLLATE)
1524 diff = memcoll (a->text, alen, b->text, blen);
1526 else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
1527 diff = alen < blen ? -1 : alen != blen;
1529 return reverse ? -diff : diff;
1532 /* Check that the lines read from the given FP come in order. Print a
1533 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1534 one if they are not in order. Otherwise, print no diagnostic
1538 checkfp (FILE *fp, char *file_name)
1540 struct buffer buf; /* Input buffer. */
1541 struct line temp; /* Copy of previous line. */
1543 uintmax_t line_number = 0;
1544 struct keyfield *key = keylist;
1545 int nonunique = 1 - unique;
1548 initbuf (&buf, sizeof (struct line),
1549 MAX (merge_buffer_size, sort_size));
1552 while (fillbuf (&buf, fp, file_name))
1554 struct line const *line = buffer_linelim (&buf);
1555 struct line const *linebase = line - buf.nlines;
1557 /* Make sure the line saved from the old buffer contents is
1558 less than or equal to the first line of the new buffer. */
1559 if (alloc && nonunique <= compare (&temp, line - 1))
1563 char hr_buf[LONGEST_HUMAN_READABLE + 1];
1564 struct line const *disorder_line = line - 1;
1565 uintmax_t disorder_line_number =
1566 buffer_linelim (&buf) - disorder_line + line_number;
1567 fprintf (stderr, _("%s: %s:%s: disorder: "),
1568 program_name, file_name,
1569 human_readable (disorder_line_number, hr_buf, 1, 1));
1570 write_bytes (disorder_line->text, disorder_line->length, stderr,
1571 _("standard error"));
1577 /* Compare each line in the buffer with its successor. */
1578 while (linebase < --line)
1579 if (nonunique <= compare (line, line - 1))
1580 goto found_disorder;
1582 line_number += buf.nlines;
1584 /* Save the last line of the buffer. */
1585 if (alloc < line->length)
1592 alloc = line->length;
1596 while (alloc < line->length);
1598 temp.text = xrealloc (temp.text, alloc);
1600 memcpy (temp.text, line->text, line->length);
1601 temp.length = line->length;
1604 temp.keybeg = temp.text + (line->keybeg - line->text);
1605 temp.keylim = temp.text + (line->keylim - line->text);
1609 xfclose (fp, file_name);
1616 /* Merge lines from FILES onto OFP. NFILES cannot be greater than
1617 NMERGE. Close input and output files before returning.
1618 OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1619 output file has not been opened yet. */
1622 mergefps (char **files, register int nfiles,
1623 FILE *ofp, const char *output_file)
1625 FILE *fps[NMERGE]; /* Input streams for each file. */
1626 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1627 struct line saved; /* Saved line storage for unique check. */
1628 struct line const *savedline = NULL;
1629 /* &saved if there is a saved line. */
1630 size_t savealloc = 0; /* Size allocated for the saved line. */
1631 struct line const *cur[NMERGE]; /* Current line in each line table. */
1632 struct line const *base[NMERGE]; /* Base of each line table. */
1633 int ord[NMERGE]; /* Table representing a permutation of fps,
1634 such that cur[ord[0]] is the smallest line
1635 and will be next output. */
1636 register int i, j, t;
1637 struct keyfield *key = keylist;
1640 /* Read initial lines from each input file. */
1641 for (i = 0; i < nfiles; ++i)
1643 fps[i] = xfopen (files[i], "r");
1644 initbuf (&buffer[i], sizeof (struct line),
1645 MAX (merge_buffer_size, sort_size / nfiles));
1646 /* If a file is empty, eliminate it from future consideration. */
1647 while (i < nfiles && !fillbuf (&buffer[i], fps[i], files[i]))
1649 xfclose (fps[i], files[i]);
1652 for (j = i; j < nfiles; ++j)
1653 files[j] = files[j + 1];
1656 free (buffer[i].buf);
1659 struct line const *linelim = buffer_linelim (&buffer[i]);
1660 cur[i] = linelim - 1;
1661 base[i] = linelim - buffer[i].nlines;
1666 ofp = xfopen (output_file, "w");
1668 /* Set up the ord table according to comparisons among input lines.
1669 Since this only reorders two items if one is strictly greater than
1670 the other, it is stable. */
1671 for (i = 0; i < nfiles; ++i)
1673 for (i = 1; i < nfiles; ++i)
1674 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1675 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1677 /* Repeatedly output the smallest line until no input remains. */
1680 struct line const *smallest = cur[ord[0]];
1682 /* If uniquified output is turned on, output only the first of
1683 an identical series of lines. */
1686 if (savedline && compare (savedline, smallest))
1689 write_bytes (saved.text, saved.length, ofp, output_file);
1694 if (savealloc < smallest->length)
1699 savealloc = smallest->length;
1702 while ((savealloc *= 2) < smallest->length);
1704 saved.text = xrealloc (saved.text, savealloc);
1706 saved.length = smallest->length;
1707 memcpy (saved.text, smallest->text, saved.length);
1711 saved.text + (smallest->keybeg - smallest->text);
1713 saved.text + (smallest->keylim - smallest->text);
1718 write_bytes (smallest->text, smallest->length, ofp, output_file);
1720 /* Check if we need to read more lines into core. */
1721 if (base[ord[0]] < smallest)
1722 cur[ord[0]] = smallest - 1;
1725 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1727 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1728 cur[ord[0]] = linelim - 1;
1729 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1733 /* We reached EOF on fps[ord[0]]. */
1734 for (i = 1; i < nfiles; ++i)
1735 if (ord[i] > ord[0])
1738 xfclose (fps[ord[0]], files[ord[0]]);
1739 zaptemp (files[ord[0]]);
1740 free (buffer[ord[0]].buf);
1741 for (i = ord[0]; i < nfiles; ++i)
1743 fps[i] = fps[i + 1];
1744 files[i] = files[i + 1];
1745 buffer[i] = buffer[i + 1];
1746 cur[i] = cur[i + 1];
1747 base[i] = base[i + 1];
1749 for (i = 0; i < nfiles; ++i)
1750 ord[i] = ord[i + 1];
1755 /* The new line just read in may be larger than other lines
1756 already in core; push it back in the queue until we encounter
1757 a line larger than it. */
1758 for (i = 1; i < nfiles; ++i)
1760 t = compare (cur[ord[0]], cur[ord[i]]);
1762 t = ord[0] - ord[i];
1767 for (j = 1; j < i; ++j)
1768 ord[j - 1] = ord[j];
1772 if (unique && savedline)
1774 write_bytes (saved.text, saved.length, ofp, output_file);
1778 xfclose (ofp, output_file);
1781 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1782 The input and output arrays are in reverse order, and LINES and
1783 TEMP point just past the end of their respective arrays. */
1786 sortlines (struct line *lines, size_t nlines, struct line *temp)
1788 register struct line *lo, *hi, *t;
1789 register size_t nlo, nhi;
1793 if (0 < compare (&lines[-1], &lines[-2]))
1795 struct line tmp = lines[-1];
1796 lines[-1] = lines[-2];
1808 sortlines (lo, nlo, temp);
1811 sortlines (hi, nhi, temp);
1816 if (compare (lo - 1, hi - 1) <= 0)
1817 *--t = *--lo, --nlo;
1819 *--t = *--hi, --nhi;
1823 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1827 /* Return the index of the first of NFILES FILES that is the same file
1828 as OUTFILE. If none can be the same, return NFILES. Consider an
1829 input pipe to be the same as OUTFILE, since the pipe might be the
1830 output of a command like "cat OUTFILE". */
1833 first_same_file (char **files, int nfiles, char const *outfile)
1836 int got_outstat = 0;
1837 struct stat instat, outstat;
1839 for (i = 0; i < nfiles; i++)
1841 int standard_input = STREQ (files[i], "-");
1843 if (STREQ (outfile, files[i]) && ! standard_input)
1849 if ((STREQ (outfile, "-")
1850 ? fstat (STDOUT_FILENO, &outstat)
1851 : stat (outfile, &outstat))
1856 if (((standard_input
1857 ? fstat (STDIN_FILENO, &instat)
1858 : stat (files[i], &instat))
1860 && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1867 /* Check that each of the NFILES FILES is ordered.
1868 Return a count of disordered files. */
1871 check (char **files, int nfiles)
1873 int i, disorders = 0;
1876 for (i = 0; i < nfiles; ++i)
1878 fp = xfopen (files[i], "r");
1879 disorders += checkfp (fp, files[i]);
1884 /* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most
1885 MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot
1889 merge (char **files, int nfiles, int max_merge, char const *output_file)
1891 while (max_merge < nfiles)
1896 for (i = 0; i < nfiles / NMERGE; ++i)
1898 temp = create_temp_file (&tfp);
1899 mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1902 temp = create_temp_file (&tfp);
1903 mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1910 mergefps (files, nfiles, NULL, output_file);
1913 /* Sort NFILES FILES onto OUTPUT_FILE. */
1916 sort (char **files, int nfiles, char const *output_file)
1919 int n_temp_files = 0;
1920 int output_file_created = 0;
1923 if (! size && ! (size = sort_size))
1924 size = default_sort_size ();
1930 char const *temp_output;
1931 char const *file = *files;
1932 FILE *fp = xfopen (file, "r");
1936 initbuf (&buf, 2 * sizeof (struct line),
1937 sort_buffer_size (&fp, 1, files, nfiles,
1938 2 * sizeof (struct line), size));
1943 while (fillbuf (&buf, fp, file))
1946 struct line *linebase;
1948 if (buf.eof && nfiles
1949 && (2 * sizeof (struct line) + 1
1950 < (buf.alloc - buf.used
1951 - 2 * sizeof (struct line) * buf.nlines)))
1953 /* End of file, but there is more input and buffer room.
1954 Concatenate the next input file; this is faster in
1956 buf.left = buf.used;
1960 line = buffer_linelim (&buf);
1961 linebase = line - buf.nlines;
1962 sortlines (line, buf.nlines, linebase);
1963 if (buf.eof && !nfiles && !n_temp_files && !buf.left)
1966 tfp = xfopen (output_file, "w");
1967 temp_output = output_file;
1968 output_file_created = 1;
1973 temp_output = create_temp_file (&tfp);
1979 write_bytes (line->text, line->length, tfp, temp_output);
1981 while (linebase < line && compare (line, line - 1) == 0)
1984 while (linebase < line);
1986 xfclose (tfp, temp_output);
1988 if (output_file_created)
1997 if (! output_file_created)
1999 int i = n_temp_files;
2000 struct tempnode *node;
2001 char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2002 for (node = temphead; i > 0; node = node->next)
2003 tempfiles[--i] = node->name;
2004 merge (tempfiles, n_temp_files, NMERGE, output_file);
2005 free ((char *) tempfiles);
2009 /* Insert key KEY at the end of the key list. */
2012 insertkey (struct keyfield *key)
2014 struct keyfield **p;
2016 for (p = &keylist; *p; p = &(*p)->next)
2022 /* Report a bad field specification SPEC, with extra info MSGID. */
2024 static void badfieldspec PARAMS ((char const *, char const *))
2027 badfieldspec (char const *spec, char const *msgid)
2029 error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2034 /* Parse the leading integer in STRING and store the resulting value
2035 (which must fit into size_t) into *VAL. Return the address of the
2036 suffix after the integer. If MSGID is NULL, return NULL after
2037 failure; otherwise, report MSGID and exit on failure. */
2040 parse_field_count (char const *string, size_t *val, char const *msgid)
2045 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2048 case LONGINT_INVALID_SUFFIX_CHAR:
2053 case LONGINT_OVERFLOW:
2055 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2056 _(msgid), (int) (suffix - string), string);
2059 case LONGINT_INVALID:
2061 error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2069 /* Handle interrupts and hangups. */
2072 sighandler (int sig)
2074 #ifndef SA_NOCLDSTOP
2075 signal (sig, SIG_IGN);
2082 struct sigaction sigact;
2084 sigact.sa_handler = SIG_DFL;
2085 sigemptyset (&sigact.sa_mask);
2086 sigact.sa_flags = 0;
2087 sigaction (sig, &sigact, NULL);
2090 signal (sig, SIG_DFL);
2093 kill (process_id, sig);
2096 /* Set the ordering options for KEY specified in S.
2097 Return the address of the first character in S that
2098 is not a valid ordering option.
2099 BLANKTYPE is the kind of blanks that 'b' should skip. */
2102 set_ordering (register const char *s, struct keyfield *key,
2103 enum blanktype blanktype)
2110 if (blanktype == bl_start || blanktype == bl_both)
2111 key->skipsblanks = 1;
2112 if (blanktype == bl_end || blanktype == bl_both)
2113 key->skipeblanks = 1;
2116 key->ignore = nondictionary;
2119 key->translate = fold_toupper;
2122 key->general_numeric = 1;
2125 key->ignore = nonprinting;
2144 static struct keyfield *
2147 struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key);
2153 main (int argc, char **argv)
2155 struct keyfield *key;
2156 struct keyfield gkey;
2160 int checkonly = 0, mergeonly = 0, nfiles = 0;
2161 int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);
2162 char *minus = "-", **files;
2163 char const *outfile = minus;
2164 static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2165 int nsigs = sizeof sigs / sizeof *sigs;
2167 struct sigaction oldact, newact;
2170 program_name = argv[0];
2171 process_id = getpid ();
2172 setlocale (LC_ALL, "");
2173 bindtextdomain (PACKAGE, LOCALEDIR);
2174 textdomain (PACKAGE);
2178 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2179 # if HAVE_NL_LANGINFO
2180 hard_LC_TIME = hard_locale (LC_TIME);
2183 /* Let's get locale's representation of the decimal point */
2185 struct lconv *lconvp = localeconv ();
2187 /* If the locale doesn't define a decimal point, or if the decimal
2188 point is multibyte, use the C decimal point. We don't support
2189 multibyte decimal points yet. */
2190 decimal_point = *lconvp->decimal_point;
2191 if (! decimal_point || lconvp->decimal_point[1])
2192 decimal_point = C_DECIMAL_POINT;
2194 /* We don't support multibyte thousands separators yet. */
2195 th_sep = *lconvp->thousands_sep;
2196 if (! th_sep || lconvp->thousands_sep[1])
2197 th_sep = CHAR_MAX + 1;
2202 have_read_stdin = 0;
2205 /* Change the way xmalloc and xrealloc fail. */
2206 xalloc_exit_failure = SORT_FAILURE;
2207 xalloc_fail_func = cleanup;
2210 sigemptyset (&caught_signals);
2211 for (i = 0; i < nsigs; i++)
2212 sigaddset (&caught_signals, sigs[i]);
2213 newact.sa_handler = sighandler;
2214 newact.sa_mask = caught_signals;
2215 newact.sa_flags = 0;
2218 for (i = 0; i < nsigs; i++)
2222 sigaction (sig, NULL, &oldact);
2223 if (oldact.sa_handler != SIG_IGN)
2224 sigaction (sig, &newact, NULL);
2226 if (signal (sig, SIG_IGN) != SIG_IGN)
2227 signal (sig, sighandler);
2231 gkey.sword = gkey.eword = -1;
2233 gkey.translate = NULL;
2234 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2235 gkey.skipsblanks = gkey.skipeblanks = 0;
2237 files = (char **) xmalloc (sizeof (char *) * argc);
2241 /* Parse an operand as a file after "--" was seen; or if
2242 pedantic and a file was seen, unless -c was not seen and the
2243 operand is "-o FILE" or "-oFILE". POSIX 1003.1-200x d5
2244 removes the exception for the -o options; when it becomes
2245 official the code will need to be changed. */
2248 || (posix_pedantic && nfiles != 0
2251 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2252 && (argv[optind][2] || optind + 1 != argc)))
2253 || ((c = getopt_long (argc, argv,
2254 "-bcdfgik:mMno:rsS:t:T:uy::z",
2255 long_options, NULL))
2260 files[nfiles++] = argv[optind++];
2265 /* Treat +POS1 [-POS2] as a key if possible; but silently
2266 treat an operand as a file if it is not a valid +POS1.
2267 POSIX 1003.1-200x d5 does not allow support for +POS1, so
2268 when it becomes official this code will need to be
2271 if (optarg[0] == '+')
2274 s = parse_field_count (optarg + 1, &key->sword, NULL);
2276 s = parse_field_count (s + 1, &key->schar, NULL);
2277 if (! (key->sword | key->schar))
2279 if (! s || *set_ordering (s, key, bl_start))
2286 if (optind != argc && argv[optind][0] == '-'
2287 && ISDIGIT (argv[optind][1]))
2289 char const *optarg1 = argv[optind++];
2290 s = parse_field_count (optarg1 + 1, &key->eword,
2291 N_("invalid number after `-'"));
2293 s = parse_field_count (s + 1, &key->echar,
2294 N_("invalid number after `.'"));
2295 if (*set_ordering (s, key, bl_end))
2296 badfieldspec (optarg1,
2297 N_("stray character in field spec"));
2303 files[nfiles++] = optarg;
2318 set_ordering (str, &gkey, bl_both);
2330 s = parse_field_count (optarg, &key->sword,
2331 N_("invalid number at field start"));
2334 /* Provoke with `sort -k0' */
2335 badfieldspec (optarg, N_("field number is zero"));
2339 s = parse_field_count (s + 1, &key->schar,
2340 N_("invalid number after `.'"));
2343 /* Provoke with `sort -k1.0' */
2344 badfieldspec (optarg, N_("character offset is zero"));
2347 if (! (key->sword | key->schar))
2349 s = set_ordering (s, key, bl_start);
2358 s = parse_field_count (s + 1, &key->eword,
2359 N_("invalid number after `,'"));
2362 /* Provoke with `sort -k1,0' */
2363 badfieldspec (optarg, N_("field number is zero"));
2366 s = parse_field_count (s + 1, &key->echar,
2367 N_("invalid number after `.'"));
2370 /* `-k 2,3' is equivalent to `+1 -3'. */
2373 s = set_ordering (s, key, bl_end);
2376 badfieldspec (optarg, N_("stray character in field spec"));
2393 specify_sort_size (optarg);
2398 if (tab && optarg[1])
2400 /* Provoke with `sort -txx'. Complain about
2401 "multi-character tab" instead of "multibyte tab", so
2402 that the diagnostic's wording does not need to be
2403 changed once multibyte characters are supported. */
2404 error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg);
2409 add_temp_dir (optarg);
2417 /* Accept and ignore e.g. -y0 for compatibility with Solaris
2418 2.x through Solaris 7. -y is marked as obsolete starting
2426 case_GETOPT_HELP_CHAR;
2428 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2431 usage (SORT_FAILURE);
2435 /* Inheritance of global options to individual keys. */
2436 for (key = keylist; key; key = key->next)
2437 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2438 && !key->skipeblanks && !key->month && !key->numeric
2439 && !key->general_numeric)
2441 key->ignore = gkey.ignore;
2442 key->translate = gkey.translate;
2443 key->skipsblanks = gkey.skipsblanks;
2444 key->skipeblanks = gkey.skipeblanks;
2445 key->month = gkey.month;
2446 key->numeric = gkey.numeric;
2447 key->general_numeric = gkey.general_numeric;
2448 key->reverse = gkey.reverse;
2451 if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks
2452 || gkey.skipeblanks || gkey.month || gkey.numeric
2453 || gkey.general_numeric))
2455 reverse = gkey.reverse;
2457 if (temp_dir_count == 0)
2459 char const *tmp_dir = getenv ("TMPDIR");
2460 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2472 error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2475 /* POSIX requires that sort return 1 IFF invoked with -c and the
2476 input is not properly sorted. */
2477 exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2482 int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2483 merge (files, nfiles, max_merge, outfile);
2486 sort (files, nfiles, outfile);
2488 if (have_read_stdin && fclose (stdin) == EOF)
2489 die (_("close failed"), "-");
2492 exit (EXIT_SUCCESS);