1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
24 /* Get isblank from GNU libc. */
27 #include <sys/types.h>
31 #include "long-options.h"
48 /* Undefine, to avoid warning about redefinition on some systems. */
50 #define min(a, b) ((a) < (b) ? (a) : (b))
52 #define UCHAR_LIM (UCHAR_MAX + 1)
53 #define UCHAR(c) ((unsigned char) (c))
55 #ifndef DEFAULT_TMPDIR
56 #define DEFAULT_TMPDIR "/tmp"
59 /* The kind of blanks for '-b' to skip in various options. */
60 enum blanktype { bl_start, bl_end, bl_both };
62 /* The character marking end of line. Default to \n. */
65 /* Lines are held in core as counted strings. */
68 char *text; /* Text of the line. */
69 int length; /* Length not including final newline. */
70 char *keybeg; /* Start of first key. */
71 char *keylim; /* Limit of first key. */
74 /* Arrays of lines. */
77 struct line *lines; /* Dynamically allocated array of lines. */
78 int used; /* Number of slots used. */
79 int alloc; /* Number of slots allocated. */
80 int limit; /* Max number of slots to allocate. */
86 char *buf; /* Dynamically allocated buffer. */
87 int used; /* Number of bytes used. */
88 int alloc; /* Number of bytes allocated. */
89 int left; /* Number of bytes left after line parsing. */
94 int sword; /* Zero-origin 'word' to start at. */
95 int schar; /* Additional characters to skip. */
96 int skipsblanks; /* Skip leading white space at start. */
97 int eword; /* Zero-origin first word after field. */
98 int echar; /* Additional characters in field. */
99 int skipeblanks; /* Skip trailing white space at finish. */
100 int *ignore; /* Boolean array of characters to ignore. */
101 char *translate; /* Translation applied to characters. */
102 int numeric; /* Flag for numeric comparison. Handle
103 strings of digits with optional decimal
104 point, but no exponential notation. */
105 int general_numeric; /* Flag for general, numeric comparison.
106 Handle numbers in exponential notation. */
107 int month; /* Flag for comparison by month name. */
108 int reverse; /* Reverse the sense of comparison. */
109 struct keyfield *next; /* Next keyfield to try. */
118 /* The name this program was run with. */
121 /* Table of digits. */
122 static int digits[UCHAR_LIM];
124 /* Table of white space. */
125 static int blanks[UCHAR_LIM];
127 /* Table of non-printing characters. */
128 static int nonprinting[UCHAR_LIM];
130 /* Table of non-dictionary characters (not letters, digits, or blanks). */
131 static int nondictionary[UCHAR_LIM];
133 /* Translation table folding lower case to upper. */
134 static char fold_toupper[UCHAR_LIM];
136 /* Table mapping 3-letter month names to integers.
137 Alphabetic order allows binary search. */
138 static struct month const monthtab[] =
154 /* During the merge phase, the number of files to merge at once. */
157 /* Initial buffer size for in core sorting. Will not grow unless a
158 line longer than this is seen. */
159 static int sortalloc = 512 * 1024;
161 /* Initial buffer size for in core merge buffers. Bear in mind that
162 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
163 static int mergealloc = 16 * 1024;
165 /* Guess of average line length. */
166 static int linelength = 30;
168 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
169 #define LINEALLOC (256 * 1024)
171 /* Prefix for temporary file names. */
172 static char *temp_file_prefix;
174 /* Flag to reverse the order of all comparisons. */
177 /* Flag for stable sort. This turns off the last ditch bytewise
178 comparison of lines, and instead leaves lines in the same order
179 they were read if all keys compare equal. */
182 /* Tab character separating fields. If NUL, then fields are separated
183 by the empty string between a non-whitespace character and a whitespace
187 /* Flag to remove consecutive duplicate lines from the output.
188 Only the last of a sequence of equal lines will be output. */
191 /* Nonzero if any of the input files are the standard input. */
192 static int have_read_stdin;
194 /* Lists of key field comparisons to be tried. */
195 static struct keyfield keyhead;
201 fprintf (stderr, _("Try `%s --help' for more information.\n"),
206 Usage: %s [OPTION]... [FILE]...\n\
210 Write sorted concatenation of all FILE(s) to standard output.\n\
212 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
213 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
214 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
215 -b ignore leading blanks in sort fields or keys\n\
216 -c check if given files already sorted, do not sort\n\
217 -d consider only [a-zA-Z0-9 ] characters in keys\n\
218 -f fold lower case to upper case characters in keys\n\
219 -g compare according to general numerical value, imply -b\n\
220 -i consider only [\\040-\\0176] characters in keys\n\
221 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
222 -m merge already sorted files, do not sort\n\
223 -n compare according to string numerical value, imply -b\n\
224 -o FILE write result on FILE instead of standard output\n\
225 -r reverse the result of comparisons\n\
226 -s stabilize sort by disabling last resort comparison\n\
227 -t SEP use SEParator instead of non- to whitespace transition\n\
228 -u with -c, check for strict ordering\n\
229 -u with -m, only output the first of an equal sequence\n\
230 -z end lines with 0 byte, not newline, for find -print0\n\
231 --help display this help and exit\n\
232 --version output version information and exit\n\
234 POS is F[.C][OPTS], where F is the field number and C the character\n\
235 position in the field, both counted from zero. OPTS is made up of one\n\
236 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
237 for that key. If no key given, use the entire line as key. With no\n\
238 FILE, or when FILE is -, read standard input.\n\
245 /* The list of temporary files. */
246 static struct tempnode
249 struct tempnode *next;
252 /* Clean up any remaining temporary files. */
257 struct tempnode *node;
259 for (node = temphead.next; node; node = node->next)
263 /* Allocate N bytes of memory dynamically, with error checking. */
266 xmalloc (unsigned int n)
273 error (0, 0, _("virtual memory exhausted"));
280 /* Change the size of an allocated block of memory P to N bytes,
282 If P is NULL, run xmalloc.
283 If N is 0, run free and return NULL. */
286 xrealloc (char *p, unsigned int n)
298 error (0, 0, _("virtual memory exhausted"));
306 xtmpfopen (const char *file)
311 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
312 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
314 error (0, errno, "%s", file);
323 xfopen (const char *file, const char *how)
327 if (strcmp (file, "-") == 0)
333 if ((fp = fopen (file, how)) == NULL)
335 error (0, errno, "%s", file);
351 /* Allow reading stdin from tty more than once. */
355 else if (fp == stdout)
357 if (fflush (fp) != 0)
359 error (0, errno, _("flushing file"));
366 if (fclose (fp) != 0)
368 error (0, errno, _("error closing file"));
376 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
378 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
380 error (0, errno, _("write error"));
386 /* Return a name for a temporary file. */
391 static unsigned int seq;
392 int len = strlen (temp_file_prefix);
393 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
394 struct tempnode *node;
396 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
398 "%s%ssort%5.5d%5.5d",
400 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
401 (unsigned int) getpid () & 0xffff, seq);
403 /* Make sure that SEQ's value fits in 5 digits. */
409 node->next = temphead.next;
410 temphead.next = node;
414 /* Search through the list of temporary files for NAME;
415 remove it if it is found on the list. */
420 struct tempnode *node, *temp;
422 for (node = &temphead; node->next; node = node->next)
423 if (!strcmp (name, node->next->name))
430 node->next = temp->next;
431 free ((char *) temp);
435 /* Initialize the character class tables. */
442 for (i = 0; i < UCHAR_LIM; ++i)
450 if (!ISALNUM (i) && !ISBLANK (i))
451 nondictionary[i] = 1;
453 fold_toupper[i] = toupper (i);
459 /* Initialize BUF, allocating ALLOC bytes initially. */
462 initbuf (struct buffer *buf, int alloc)
465 buf->buf = xmalloc (buf->alloc);
466 buf->used = buf->left = 0;
469 /* Fill BUF reading from FP, moving buf->left bytes from the end
470 of buf->buf to the beginning first. If EOF is reached and the
471 file wasn't terminated by a newline, supply one. Return a count
472 of bytes buffered. */
475 fillbuf (struct buffer *buf, FILE *fp)
479 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
480 buf->used = buf->left;
482 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
484 if (buf->used == buf->alloc)
487 buf->buf = xrealloc (buf->buf, buf->alloc);
489 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
492 error (0, errno, _("read error"));
499 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
501 if (buf->used == buf->alloc)
504 buf->buf = xrealloc (buf->buf, buf->alloc);
506 buf->buf[buf->used++] = eolchar;
512 /* Initialize LINES, allocating space for ALLOC lines initially.
513 LIMIT is the maximum possible number of lines to allocate space
517 initlines (struct lines *lines, int alloc, int limit)
519 lines->alloc = alloc;
520 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
522 lines->limit = limit;
525 /* Return a pointer to the first character of the field specified
529 begfield (const struct line *line, const struct keyfield *key)
531 register char *ptr = line->text, *lim = ptr + line->length;
532 register int sword = key->sword, schar = key->schar;
535 while (ptr < lim && sword--)
537 while (ptr < lim && *ptr != tab)
543 while (ptr < lim && sword--)
545 while (ptr < lim && blanks[UCHAR (*ptr)])
547 while (ptr < lim && !blanks[UCHAR (*ptr)])
551 if (key->skipsblanks)
552 while (ptr < lim && blanks[UCHAR (*ptr)])
555 if (ptr + schar <= lim)
563 /* Return the limit of (a pointer to the first character after) the field
564 in LINE specified by KEY. */
567 limfield (const struct line *line, const struct keyfield *key)
569 register char *ptr = line->text, *lim = ptr + line->length;
570 register int eword = key->eword, echar = key->echar;
572 /* Note: from the POSIX spec:
573 The leading field separator itself is included in
574 a field when -t is not used. FIXME: move this comment up... */
576 /* Move PTR past EWORD fields or to one past the last byte on LINE,
577 whichever comes first. If there are more than EWORD fields, leave
578 PTR pointing at the beginning of the field having zero-based index,
579 EWORD. If a delimiter character was specified (via -t), then that
580 `beginning' is the first character following the delimiting TAB.
581 Otherwise, leave PTR pointing at the first `blank' character after
582 the preceding field. */
584 while (ptr < lim && eword--)
586 while (ptr < lim && *ptr != tab)
588 if (ptr < lim && (eword || echar > 0))
592 while (ptr < lim && eword--)
594 while (ptr < lim && blanks[UCHAR (*ptr)])
596 while (ptr < lim && !blanks[UCHAR (*ptr)])
600 /* Make LIM point to the end of (one byte past) the current field. */
604 newlim = memchr (ptr, tab, lim - ptr);
612 while (newlim < lim && blanks[UCHAR (*newlim)])
614 while (newlim < lim && !blanks[UCHAR (*newlim)])
619 /* If we're skipping leading blanks, don't start counting characters
620 until after skipping past any leading blanks. */
621 if (key->skipsblanks)
622 while (ptr < lim && blanks[UCHAR (*ptr)])
625 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
626 if (ptr + echar <= lim)
637 trim_trailing_blanks (const char *a_start, char **a_end)
639 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
643 /* Find the lines in BUF, storing pointers and lengths in LINES.
644 Also replace newlines in BUF with NULs. */
647 findlines (struct buffer *buf, struct lines *lines)
649 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
650 struct keyfield *key = keyhead.next;
654 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
655 && lines->used < lines->limit)
657 /* There are various places in the code that rely on a NUL
658 being at the end of in-core lines; NULs inside the lines
659 will not cause trouble, though. */
662 if (lines->used == lines->alloc)
665 lines->lines = (struct line *)
666 xrealloc ((char *) lines->lines,
667 lines->alloc * sizeof (struct line));
670 lines->lines[lines->used].text = beg;
671 lines->lines[lines->used].length = ptr - beg;
673 /* Precompute the position of the first key for efficiency. */
677 lines->lines[lines->used].keylim =
678 limfield (&lines->lines[lines->used], key);
680 lines->lines[lines->used].keylim = ptr;
683 lines->lines[lines->used].keybeg =
684 begfield (&lines->lines[lines->used], key);
687 if (key->skipsblanks)
688 while (blanks[UCHAR (*beg)])
690 lines->lines[lines->used].keybeg = beg;
692 if (key->skipeblanks)
694 trim_trailing_blanks (lines->lines[lines->used].keybeg,
695 &lines->lines[lines->used].keylim);
700 lines->lines[lines->used].keybeg = 0;
701 lines->lines[lines->used].keylim = 0;
708 buf->left = lim - beg;
711 /* Compare strings A and B containing decimal fractions < 1. Each string
712 should begin with a decimal point followed immediately by the digits
713 of the fraction. Strings not of this form are considered to be zero. */
716 fraccompare (register const char *a, register const char *b)
718 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
720 if (tmpa == '.' && tmpb == '.')
723 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
724 while (tmpa == tmpb && digits[tmpa]);
725 if (digits[tmpa] && digits[tmpb])
745 else if (tmpa == '.')
754 else if (tmpb == '.')
766 /* Compare strings A and B as numbers without explicitly converting them to
767 machine numbers. Comparatively slow for short strings, but asymptotically
771 numcompare (register const char *a, register const char *b)
773 register int tmpa, tmpb, loga, logb, tmp;
810 while (tmpa == tmpb && digits[tmpa])
811 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
813 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
814 return -fraccompare (a, b);
817 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
823 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
828 if ((tmp = logb - loga) != 0)
836 else if (tmpb == '-')
864 while (tmpa == tmpb && digits[tmpa])
865 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
867 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
868 return fraccompare (a, b);
871 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
877 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
882 if ((tmp = loga - logb) != 0)
893 general_numcompare (const char *sa, const char *sb)
896 /* FIXME: add option to warn about failed conversions. */
897 /* FIXME: maybe add option to try expensive FP conversion
898 only if A and B can't be compared more cheaply/accurately. */
899 if (xstrtod (sa, NULL, &a))
903 if (xstrtod (sb, NULL, &b))
907 return a == b ? 0 : a < b ? -1 : 1;
910 /* Return an integer <= 12 associated with month name S with length LEN,
911 0 if the name in S is not recognized. */
914 getmonth (const char *s, int len)
917 register int i, lo = 0, hi = 12;
919 while (len > 0 && blanks[UCHAR(*s)])
925 for (i = 0; i < 3; ++i)
926 month[i] = fold_toupper[UCHAR (s[i])];
930 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
934 if (!strcmp (month, monthtab[lo].name))
935 return monthtab[lo].val;
939 /* Compare two lines A and B trying every key in sequence until there
940 are no more keys or a difference is found. */
943 keycompare (const struct line *a, const struct line *b)
945 register char *texta, *textb, *lima, *limb, *translate;
946 register int *ignore;
947 struct keyfield *key;
948 int diff = 0, iter = 0, lena, lenb;
950 for (key = keyhead.next; key; key = key->next, ++iter)
952 ignore = key->ignore;
953 translate = key->translate;
955 /* Find the beginning and limit of each field. */
956 if (iter || a->keybeg == NULL || b->keybeg == NULL)
959 lima = limfield (a, key), limb = limfield (b, key);
961 lima = a->text + a->length, limb = b->text + b->length;
964 texta = begfield (a, key), textb = begfield (b, key);
967 texta = a->text, textb = b->text;
968 if (key->skipsblanks)
970 while (texta < lima && blanks[UCHAR (*texta)])
972 while (textb < limb && blanks[UCHAR (*textb)])
979 /* For the first iteration only, the key positions have
980 been precomputed for us. */
981 texta = a->keybeg, lima = a->keylim;
982 textb = b->keybeg, limb = b->keylim;
985 /* Find the lengths. */
986 lena = lima - texta, lenb = limb - textb;
992 if (key->skipeblanks)
994 char *a_end = texta + lena;
995 char *b_end = textb + lenb;
996 trim_trailing_blanks (texta, &a_end);
997 trim_trailing_blanks (textb, &b_end);
998 lena = a_end - texta;
999 lenb = b_end - textb;
1002 /* Actually compare the fields. */
1007 char savea = *lima, saveb = *limb;
1009 *lima = *limb = '\0';
1010 diff = numcompare (texta, textb);
1011 *lima = savea, *limb = saveb;
1014 diff = numcompare (texta, textb);
1017 return key->reverse ? -diff : diff;
1020 else if (key->general_numeric)
1024 char savea = *lima, saveb = *limb;
1026 *lima = *limb = '\0';
1027 diff = general_numcompare (texta, textb);
1028 *lima = savea, *limb = saveb;
1031 diff = general_numcompare (texta, textb);
1034 return key->reverse ? -diff : diff;
1037 else if (key->month)
1039 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1041 return key->reverse ? -diff : diff;
1044 else if (ignore && translate)
1046 #define CMP_WITH_IGNORE(A, B) \
1049 while (texta < lima && textb < limb) \
1051 while (texta < lima && ignore[UCHAR (*texta)]) \
1053 while (textb < limb && ignore[UCHAR (*textb)]) \
1055 if (texta < lima && textb < limb) \
1066 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1068 else if (texta < lima && textb == limb \
1069 && !ignore[UCHAR (*texta)]) \
1075 while (texta < lima && ignore[UCHAR (*texta)]) \
1077 while (textb < limb && ignore[UCHAR (*textb)]) \
1080 if (texta == lima && textb < limb) \
1082 else if (texta < lima && textb == limb) \
1085 /* Relative lengths are meaningless if characters were ignored. \
1086 Handling this case here avoids what might be an invalid length \
1087 comparison below. */ \
1088 if (diff == 0 && texta == lima && textb == limb) \
1093 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1095 CMP_WITH_IGNORE (*texta, *textb);
1097 while (texta < lima && textb < limb)
1099 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1101 diff = (translate[UCHAR (*--texta)]
1102 - translate[UCHAR (*--textb)]);
1107 diff = memcmp (texta, textb, min (lena, lenb));
1110 return key->reverse ? -diff : diff;
1111 if ((diff = lena - lenb) != 0)
1112 return key->reverse ? -diff : diff;
1118 /* Compare two lines A and B, returning negative, zero, or positive
1119 depending on whether A compares less than, equal to, or greater than B. */
1122 compare (register const struct line *a, register const struct line *b)
1124 int diff, tmpa, tmpb, mini;
1126 /* First try to compare on the specified keys (if any).
1127 The only two cases with no key at all are unadorned sort,
1128 and unadorned sort -r. */
1131 diff = keycompare (a, b);
1134 if (unique || stable)
1138 /* If the keys all compare equal (or no keys were specified)
1139 fall through to the default byte-by-byte comparison. */
1140 tmpa = a->length, tmpb = b->length;
1141 mini = min (tmpa, tmpb);
1146 char *ap = a->text, *bp = b->text;
1148 diff = UCHAR (*ap) - UCHAR (*bp);
1151 diff = memcmp (ap, bp, mini);
1157 return reverse ? -diff : diff;
1160 /* Check that the lines read from the given FP come in order. Return
1161 1 if they do and 0 if there is a disorder.
1162 FIXME: return number of first out-of-order line if not sorted. */
1167 struct buffer buf; /* Input buffer. */
1168 struct lines lines; /* Lines scanned from the buffer. */
1169 struct line temp; /* Copy of previous line. */
1170 int cc; /* Character count. */
1171 int alloc, sorted = 1;
1173 initbuf (&buf, mergealloc);
1174 initlines (&lines, mergealloc / linelength + 1,
1175 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1177 temp.text = xmalloc (alloc);
1179 cc = fillbuf (&buf, fp);
1183 findlines (&buf, &lines);
1187 struct line *prev_line; /* Pointer to previous line. */
1188 int cmp; /* Result of calling compare. */
1191 /* Compare each line in the buffer with its successor. */
1192 for (i = 0; i < lines.used - 1; ++i)
1194 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1195 if ((unique && cmp >= 0) || (cmp > 0))
1202 /* Save the last line of the buffer and refill the buffer. */
1203 prev_line = lines.lines + (lines.used - 1);
1204 if (prev_line->length > alloc)
1206 while (prev_line->length + 1 > alloc)
1208 temp.text = xrealloc (temp.text, alloc);
1210 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1211 temp.length = prev_line->length;
1212 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1213 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1215 cc = fillbuf (&buf, fp);
1219 findlines (&buf, &lines);
1220 /* Make sure the line saved from the old buffer contents is
1221 less than or equal to the first line of the new buffer. */
1222 cmp = compare (&temp, &lines.lines[0]);
1223 if ((unique && cmp >= 0) || (cmp > 0))
1233 free ((char *) lines.lines);
1238 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1239 Close FPS before returning. */
1242 mergefps (FILE **fps, register int nfps, FILE *ofp)
1244 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1245 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1246 struct line saved; /* Saved line for unique check. */
1247 int savedflag = 0; /* True if there is a saved line. */
1248 int savealloc; /* Size allocated for the saved line. */
1249 int cur[NMERGE]; /* Current line in each line table. */
1250 int ord[NMERGE]; /* Table representing a permutation of fps,
1251 such that lines[ord[0]].lines[cur[ord[0]]]
1252 is the smallest line and will be next
1254 register int i, j, t;
1256 #ifdef lint /* Suppress `used before initialized' warning. */
1260 /* Allocate space for a saved line if necessary. */
1263 savealloc = linelength;
1264 saved.text = xmalloc (savealloc);
1267 /* Read initial lines from each input file. */
1268 for (i = 0; i < nfps; ++i)
1270 initbuf (&buffer[i], mergealloc);
1271 /* If a file is empty, eliminate it from future consideration. */
1272 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1276 for (j = i; j < nfps; ++j)
1277 fps[j] = fps[j + 1];
1280 free (buffer[i].buf);
1283 initlines (&lines[i], mergealloc / linelength + 1,
1284 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1285 findlines (&buffer[i], &lines[i]);
1290 /* Set up the ord table according to comparisons among input lines.
1291 Since this only reorders two items if one is strictly greater than
1292 the other, it is stable. */
1293 for (i = 0; i < nfps; ++i)
1295 for (i = 1; i < nfps; ++i)
1296 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1297 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1298 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1300 /* Repeatedly output the smallest line until no input remains. */
1303 /* If uniqified output is turned on, output only the first of
1304 an identical series of lines. */
1307 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1309 write_bytes (saved.text, saved.length, ofp);
1310 putc (eolchar, ofp);
1315 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1317 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1319 saved.text = xrealloc (saved.text, savealloc);
1321 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1322 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1324 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1326 saved.keybeg = saved.text +
1327 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1328 - lines[ord[0]].lines[cur[ord[0]]].text);
1330 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1332 saved.keylim = saved.text +
1333 (lines[ord[0]].lines[cur[ord[0]]].keylim
1334 - lines[ord[0]].lines[cur[ord[0]]].text);
1341 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1342 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1343 putc (eolchar, ofp);
1346 /* Check if we need to read more lines into core. */
1347 if (++cur[ord[0]] == lines[ord[0]].used)
1348 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1350 findlines (&buffer[ord[0]], &lines[ord[0]]);
1355 /* We reached EOF on fps[ord[0]]. */
1356 for (i = 1; i < nfps; ++i)
1357 if (ord[i] > ord[0])
1360 xfclose (fps[ord[0]]);
1361 free (buffer[ord[0]].buf);
1362 free ((char *) lines[ord[0]].lines);
1363 for (i = ord[0]; i < nfps; ++i)
1365 fps[i] = fps[i + 1];
1366 buffer[i] = buffer[i + 1];
1367 lines[i] = lines[i + 1];
1368 cur[i] = cur[i + 1];
1370 for (i = 0; i < nfps; ++i)
1371 ord[i] = ord[i + 1];
1375 /* The new line just read in may be larger than other lines
1376 already in core; push it back in the queue until we encounter
1377 a line larger than it. */
1378 for (i = 1; i < nfps; ++i)
1380 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1381 &lines[ord[i]].lines[cur[ord[i]]]);
1383 t = ord[0] - ord[i];
1388 for (j = 1; j < i; ++j)
1389 ord[j - 1] = ord[j];
1393 if (unique && savedflag)
1395 write_bytes (saved.text, saved.length, ofp);
1396 putc (eolchar, ofp);
1401 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1404 sortlines (struct line *lines, int nlines, struct line *temp)
1406 register struct line *lo, *hi, *t;
1407 register int nlo, nhi;
1411 if (compare (&lines[0], &lines[1]) > 0)
1412 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1422 sortlines (lo, nlo, temp);
1425 sortlines (hi, nhi, temp);
1430 if (compare (lo, hi) <= 0)
1431 *t++ = *lo++, --nlo;
1433 *t++ = *hi++, --nhi;
1437 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1441 /* Check that each of the NFILES FILES is ordered.
1442 Return a count of disordered files. */
1445 check (char **files, int nfiles)
1447 int i, disorders = 0;
1450 for (i = 0; i < nfiles; ++i)
1452 fp = xfopen (files[i], "r");
1455 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1462 /* Merge NFILES FILES onto OFP. */
1465 merge (char **files, int nfiles, FILE *ofp)
1469 FILE *fps[NMERGE], *tfp;
1471 while (nfiles > NMERGE)
1474 for (i = 0; i < nfiles / NMERGE; ++i)
1476 for (j = 0; j < NMERGE; ++j)
1477 fps[j] = xfopen (files[i * NMERGE + j], "r");
1478 tfp = xtmpfopen (temp = tempname ());
1479 mergefps (fps, NMERGE, tfp);
1481 for (j = 0; j < NMERGE; ++j)
1482 zaptemp (files[i * NMERGE + j]);
1485 for (j = 0; j < nfiles % NMERGE; ++j)
1486 fps[j] = xfopen (files[i * NMERGE + j], "r");
1487 tfp = xtmpfopen (temp = tempname ());
1488 mergefps (fps, nfiles % NMERGE, tfp);
1490 for (j = 0; j < nfiles % NMERGE; ++j)
1491 zaptemp (files[i * NMERGE + j]);
1496 for (i = 0; i < nfiles; ++i)
1497 fps[i] = xfopen (files[i], "r");
1498 mergefps (fps, i, ofp);
1499 for (i = 0; i < nfiles; ++i)
1503 /* Sort NFILES FILES onto OFP. */
1506 sort (char **files, int nfiles, FILE *ofp)
1513 struct tempnode *node;
1514 int n_temp_files = 0;
1517 initbuf (&buf, sortalloc);
1518 initlines (&lines, sortalloc / linelength + 1,
1519 LINEALLOC / sizeof (struct line));
1521 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1525 fp = xfopen (*files++, "r");
1526 while (fillbuf (&buf, fp))
1528 findlines (&buf, &lines);
1529 if (lines.used > ntmp)
1531 while (lines.used > ntmp)
1533 tmp = (struct line *)
1534 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1536 sortlines (lines.lines, lines.used, tmp);
1537 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1542 tfp = xtmpfopen (tempname ());
1544 for (i = 0; i < lines.used; ++i)
1545 if (!unique || i == 0
1546 || compare (&lines.lines[i], &lines.lines[i - 1]))
1548 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1549 putc (eolchar, tfp);
1558 free ((char *) lines.lines);
1559 free ((char *) tmp);
1563 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1565 for (node = temphead.next; i > 0; node = node->next)
1566 tempfiles[--i] = node->name;
1567 merge (tempfiles, n_temp_files, ofp);
1568 free ((char *) tempfiles);
1572 /* Insert key KEY at the end of the list (`keyhead'). */
1575 insertkey (struct keyfield *key)
1577 struct keyfield *k = &keyhead;
1586 badfieldspec (const char *s)
1588 error (2, 0, _("invalid field specification `%s'"), s);
1591 /* Handle interrupts and hangups. */
1594 sighandler (int sig)
1597 struct sigaction sigact;
1599 sigact.sa_handler = SIG_DFL;
1600 sigemptyset (&sigact.sa_mask);
1601 sigact.sa_flags = 0;
1602 sigaction (sig, &sigact, NULL);
1603 #else /* !SA_INTERRUPT */
1604 signal (sig, SIG_DFL);
1605 #endif /* SA_INTERRUPT */
1607 kill (getpid (), sig);
1610 /* Set the ordering options for KEY specified in S.
1611 Return the address of the first character in S that
1612 is not a valid ordering option.
1613 BLANKTYPE is the kind of blanks that 'b' should skip. */
1616 set_ordering (register const char *s, struct keyfield *key,
1617 enum blanktype blanktype)
1624 if (blanktype == bl_start || blanktype == bl_both)
1625 key->skipsblanks = 1;
1626 if (blanktype == bl_end || blanktype == bl_both)
1627 key->skipeblanks = 1;
1630 key->ignore = nondictionary;
1633 key->translate = fold_toupper;
1636 key->general_numeric = 1;
1639 key->ignore = nonprinting;
1646 if (blanktype == bl_start || blanktype == bl_both)
1647 key->skipsblanks = 1;
1648 if (blanktype == bl_end || blanktype == bl_both)
1649 key->skipeblanks = 1;
1663 main (int argc, char **argv)
1665 struct keyfield *key = NULL, gkey;
1668 int checkonly = 0, mergeonly = 0, nfiles = 0;
1669 char *minus = "-", *outfile = minus, **files, *tmp;
1672 struct sigaction oldact, newact;
1673 #endif /* SA_INTERRUPT */
1675 program_name = argv[0];
1676 setlocale (LC_ALL, "");
1677 bindtextdomain (PACKAGE, LOCALEDIR);
1678 textdomain (PACKAGE);
1680 parse_long_options (argc, argv, "sort", PACKAGE_VERSION, usage);
1682 have_read_stdin = 0;
1685 temp_file_prefix = getenv ("TMPDIR");
1686 if (temp_file_prefix == NULL)
1687 temp_file_prefix = DEFAULT_TMPDIR;
1690 newact.sa_handler = sighandler;
1691 sigemptyset (&newact.sa_mask);
1692 newact.sa_flags = 0;
1694 sigaction (SIGINT, NULL, &oldact);
1695 if (oldact.sa_handler != SIG_IGN)
1696 sigaction (SIGINT, &newact, NULL);
1697 sigaction (SIGHUP, NULL, &oldact);
1698 if (oldact.sa_handler != SIG_IGN)
1699 sigaction (SIGHUP, &newact, NULL);
1700 sigaction (SIGPIPE, NULL, &oldact);
1701 if (oldact.sa_handler != SIG_IGN)
1702 sigaction (SIGPIPE, &newact, NULL);
1703 sigaction (SIGTERM, NULL, &oldact);
1704 if (oldact.sa_handler != SIG_IGN)
1705 sigaction (SIGTERM, &newact, NULL);
1706 #else /* !SA_INTERRUPT */
1707 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1708 signal (SIGINT, sighandler);
1709 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1710 signal (SIGHUP, sighandler);
1711 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1712 signal (SIGPIPE, sighandler);
1713 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1714 signal (SIGTERM, sighandler);
1715 #endif /* !SA_INTERRUPT */
1717 gkey.sword = gkey.eword = -1;
1719 gkey.translate = NULL;
1720 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1721 gkey.skipsblanks = gkey.skipeblanks = 0;
1723 files = (char **) xmalloc (sizeof (char *) * argc);
1725 for (i = 1; i < argc; ++i)
1727 if (argv[i][0] == '+')
1731 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1734 key->translate = NULL;
1735 key->skipsblanks = key->skipeblanks = 0;
1736 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1738 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1739 badfieldspec (argv[i]);
1740 for (t = 0; digits[UCHAR (*s)]; ++s)
1741 t = 10 * t + *s - '0';
1744 for (++s; digits[UCHAR (*s)]; ++s)
1745 t2 = 10 * t2 + *s - '0';
1753 s = set_ordering (s, key, bl_start);
1755 badfieldspec (argv[i]);
1757 else if (argv[i][0] == '-' && argv[i][1])
1760 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1764 for (t = 0; digits[UCHAR (*s)]; ++s)
1765 t = t * 10 + *s - '0';
1768 for (++s; digits[UCHAR (*s)]; ++s)
1769 t2 = t2 * 10 + *s - '0';
1772 s = set_ordering (s, key, bl_end);
1774 badfieldspec (argv[i]);
1781 s = set_ordering (s, &gkey, bl_both);
1795 error (2, 0, _("option `-k' requires an argument"));
1801 key = (struct keyfield *)
1802 xmalloc (sizeof (struct keyfield));
1805 key->translate = NULL;
1806 key->skipsblanks = key->skipeblanks = 0;
1807 key->numeric = key->month = key->reverse = 0;
1809 if (!digits[UCHAR (*s)])
1810 badfieldspec (argv[i]);
1811 for (t = 0; digits[UCHAR (*s)]; ++s)
1812 t = 10 * t + *s - '0';
1815 /* Provoke with `sort -k0' */
1816 error (0, 0, _("the starting field number argument \
1817 to the `-k' option must be positive"));
1818 badfieldspec (argv[i]);
1824 if (!digits[UCHAR (s[1])])
1826 /* Provoke with `sort -k1.' */
1827 error (0, 0, _("starting field spec has `.' but \
1828 lacks following character offset"));
1829 badfieldspec (argv[i]);
1831 for (++s; digits[UCHAR (*s)]; ++s)
1832 t2 = 10 * t2 + *s - '0';
1835 /* Provoke with `sort -k1.0' */
1836 error (0, 0, _("starting field character offset \
1837 argument to the `-k' option\nmust be positive"));
1838 badfieldspec (argv[i]);
1849 s = set_ordering (s, key, bl_start);
1856 badfieldspec (argv[i]);
1859 /* Skip over comma. */
1863 /* Provoke with `sort -k1,' */
1864 error (0, 0, _("field specification has `,' but \
1865 lacks following field spec"));
1866 badfieldspec (argv[i]);
1869 for (t = 0; digits[UCHAR (*s)]; ++s)
1870 t = t * 10 + *s - '0';
1873 /* Provoke with `sort -k1,0' */
1874 error (0, 0, _("ending field number argument \
1875 to the `-k' option must be positive"));
1876 badfieldspec (argv[i]);
1882 if (!digits[UCHAR (s[1])])
1884 /* Provoke with `sort -k1,1.' */
1885 error (0, 0, _("ending field spec has `.' \
1886 but lacks following character offset"));
1887 badfieldspec (argv[i]);
1889 for (++s; digits[UCHAR (*s)]; ++s)
1890 t2 = t2 * 10 + *s - '0';
1894 /* `-k 2,3' is equivalent to `+1 -3'. */
1899 s = set_ordering (s, key, bl_end);
1901 badfieldspec (argv[i]);
1915 error (2, 0, _("option `-o' requires an argument"));
1917 outfile = argv[++i];
1926 else if (i < argc - 1)
1932 error (2, 0, _("option `-t' requires an argument"));
1936 temp_file_prefix = ++s;
1940 temp_file_prefix = argv[++i];
1942 error (2, 0, _("option `-T' requires an argument"));
1953 /* Accept and ignore e.g. -y0 for compatibility with
1957 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
1965 else /* Not an option. */
1967 files[nfiles++] = argv[i];
1975 /* Inheritance of global options to individual keys. */
1976 for (key = keyhead.next; key; key = key->next)
1977 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
1978 && !key->skipeblanks && !key->month && !key->numeric
1979 && !key->general_numeric)
1981 key->ignore = gkey.ignore;
1982 key->translate = gkey.translate;
1983 key->skipsblanks = gkey.skipsblanks;
1984 key->skipeblanks = gkey.skipeblanks;
1985 key->month = gkey.month;
1986 key->numeric = gkey.numeric;
1987 key->general_numeric = gkey.general_numeric;
1988 key->reverse = gkey.reverse;
1991 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
1992 || gkey.skipeblanks || gkey.month || gkey.numeric
1993 || gkey.general_numeric))
1995 reverse = gkey.reverse;
2004 exit (check (files, nfiles) != 0);
2006 if (strcmp (outfile, "-"))
2008 struct stat outstat;
2009 if (stat (outfile, &outstat) == 0)
2011 /* The following code prevents a race condition when
2012 people use the brain dead shell programming idiom:
2013 cat file | sort -o file
2014 This feature is provided for historical compatibility,
2015 but we strongly discourage ever relying on this in
2016 new shell programs. */
2018 /* Temporarily copy each input file that might be another name
2019 for the output file. When in doubt (e.g. a pipe), copy. */
2020 for (i = 0; i < nfiles; ++i)
2026 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2029 if ((strcmp (files[i], "-")
2030 ? stat (files[i], &instat)
2031 : fstat (fileno (stdin), &instat)) != 0)
2033 error (0, errno, "%s", files[i]);
2037 if (S_ISREG (instat.st_mode)
2038 && (instat.st_ino != outstat.st_ino
2039 || instat.st_dev != outstat.st_dev))
2041 /* We know the files are distinct. */
2046 fp = xfopen (files[i], "r");
2048 ofp = xtmpfopen (tmp);
2049 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2050 write_bytes (buf, cc, ofp);
2053 error (0, errno, "%s", files[i]);
2062 ofp = xfopen (outfile, "w");
2068 merge (files, nfiles, ofp);
2070 sort (files, nfiles, ofp);
2073 /* If we wait for the implicit flush on exit, and the parent process
2074 has closed stdout (e.g., exec >&- in a shell), then the output file
2075 winds up empty. I don't understand why. This is under SunOS,
2076 Solaris, Ultrix, and Irix. This premature fflush makes the output
2077 reappear. --karl@cs.umb.edu */
2078 if (fflush (ofp) < 0)
2079 error (1, errno, _("%s: write error"), outfile);
2081 if (have_read_stdin && fclose (stdin) == EOF)
2082 error (1, errno, outfile);
2083 if (ferror (stdout) || fclose (stdout) == EOF)
2084 error (1, errno, _("%s: write error"), outfile);