1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
24 /* Get isblank from GNU libc. */
27 #include <sys/types.h>
32 #include "long-options.h"
33 #include "safe-stat.h"
51 #define min(a, b) ((a) < (b) ? (a) : (b))
52 #define UCHAR_LIM (UCHAR_MAX + 1)
53 #define UCHAR(c) ((unsigned char) (c))
55 /* The kind of blanks for '-b' to skip in various options. */
56 enum blanktype { bl_start, bl_end, bl_both };
58 /* The name this program was run with. */
61 /* Table of digits. */
62 static int digits[UCHAR_LIM];
64 /* Table of white space. */
65 static int blanks[UCHAR_LIM];
67 /* Table of non-printing characters. */
68 static int nonprinting[UCHAR_LIM];
70 /* Table of non-dictionary characters (not letters, digits, or blanks). */
71 static int nondictionary[UCHAR_LIM];
73 /* Translation table folding lower case to upper. */
74 static char fold_toupper[UCHAR_LIM];
76 /* Table mapping 3-letter month names to integers.
77 Alphabetic order allows binary search. */
98 /* During the merge phase, the number of files to merge at once. */
101 /* Initial buffer size for in core sorting. Will not grow unless a
102 line longer than this is seen. */
103 static int sortalloc = 512 * 1024;
105 /* Initial buffer size for in core merge buffers. Bear in mind that
106 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
107 static int mergealloc = 16 * 1024;
109 /* Guess of average line length. */
110 static int linelength = 30;
112 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
113 #define LINEALLOC (256 * 1024)
115 /* Prefix for temporary file names. */
116 static char *temp_file_prefix;
118 /* Flag to reverse the order of all comparisons. */
121 /* Flag for stable sort. This turns off the last ditch bytewise
122 comparison of lines, and instead leaves lines in the same order
123 they were read if all keys compare equal. */
126 /* Tab character separating fields. If NUL, then fields are separated
127 by the empty string between a non-whitespace character and a whitespace
131 /* Flag to remove consecutive duplicate lines from the output.
132 Only the last of a sequence of equal lines will be output. */
135 /* Nonzero if any of the input files are the standard input. */
136 static int have_read_stdin;
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 int length; /* Length not including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
147 /* Arrays of lines. */
150 struct line *lines; /* Dynamically allocated array of lines. */
151 int used; /* Number of slots used. */
152 int alloc; /* Number of slots allocated. */
153 int limit; /* Max number of slots to allocate. */
159 char *buf; /* Dynamically allocated buffer. */
160 int used; /* Number of bytes used. */
161 int alloc; /* Number of bytes allocated. */
162 int left; /* Number of bytes left after line parsing. */
165 /* Lists of key field comparisons to be tried. */
166 static struct keyfield
168 int sword; /* Zero-origin 'word' to start at. */
169 int schar; /* Additional characters to skip. */
170 int skipsblanks; /* Skip leading white space at start. */
171 int eword; /* Zero-origin first word after field. */
172 int echar; /* Additional characters in field. */
173 int skipeblanks; /* Skip trailing white space at finish. */
174 int *ignore; /* Boolean array of characters to ignore. */
175 char *translate; /* Translation applied to characters. */
176 int numeric; /* Flag for numeric comparison. */
177 int month; /* Flag for comparison by month name. */
178 int reverse; /* Reverse the sense of comparison. */
179 struct keyfield *next; /* Next keyfield to try. */
182 /* The list of temporary files. */
183 static struct tempnode
186 struct tempnode *next;
189 /* Clean up any remaining temporary files. */
194 struct tempnode *node;
196 for (node = temphead.next; node; node = node->next)
200 /* Allocate N bytes of memory dynamically, with error checking. */
211 error (0, 0, "virtual memory exhausted");
218 /* Change the size of an allocated block of memory P to N bytes,
220 If P is NULL, run xmalloc.
221 If N is 0, run free and return NULL. */
238 error (0, 0, "virtual memory exhausted");
249 FILE *fp = strcmp (file, "-") ? fopen (file, how) : stdin;
253 error (0, errno, "%s", file);
266 if (fflush (fp) != 0)
268 error (0, errno, "flushing file");
273 if (fp != stdin && fp != stdout)
275 if (fclose (fp) != 0)
277 error (0, errno, "error closing file");
284 /* Allow reading stdin from tty more than once. */
290 xfwrite (buf, size, nelem, fp)
295 if (fwrite (buf, size, nelem, fp) != nelem)
297 error (0, errno, "write error");
303 /* Return a name for a temporary file. */
309 int len = strlen (temp_file_prefix);
310 char *name = xmalloc (len + 16);
311 struct tempnode *node =
312 (struct tempnode *) xmalloc (sizeof (struct tempnode));
315 (len && temp_file_prefix[len - 1] != '/'
316 ? "%s/sort%5.5d%5.5d"
317 : "%ssort%5.5d%5.5d"),
318 temp_file_prefix, (int) getpid (), ++seq);
320 node->next = temphead.next;
321 temphead.next = node;
325 /* Search through the list of temporary files for NAME;
326 remove it if it is found on the list. */
332 struct tempnode *node, *temp;
334 for (node = &temphead; node->next; node = node->next)
335 if (!strcmp (name, node->next->name))
342 node->next = temp->next;
343 free ((char *) temp);
347 /* Initialize the character class tables. */
354 for (i = 0; i < UCHAR_LIM; ++i)
362 if (!ISALNUM (i) && !ISBLANK (i))
363 nondictionary[i] = 1;
365 fold_toupper[i] = toupper (i);
371 /* Initialize BUF, allocating ALLOC bytes initially. */
379 buf->buf = xmalloc (buf->alloc);
380 buf->used = buf->left = 0;
383 /* Fill BUF reading from FP, moving buf->left bytes from the end
384 of buf->buf to the beginning first. If EOF is reached and the
385 file wasn't terminated by a newline, supply one. Return a count
386 of bytes buffered. */
395 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
396 buf->used = buf->left;
398 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
400 if (buf->used == buf->alloc)
403 buf->buf = xrealloc (buf->buf, buf->alloc);
405 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
408 error (0, errno, "read error");
415 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
417 if (buf->used == buf->alloc)
420 buf->buf = xrealloc (buf->buf, buf->alloc);
422 buf->buf[buf->used++] = '\n';
428 /* Initialize LINES, allocating space for ALLOC lines initially.
429 LIMIT is the maximum possible number of lines to allocate space
433 initlines (lines, alloc, limit)
438 lines->alloc = alloc;
439 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
441 lines->limit = limit;
444 /* Return a pointer to the first character of the field specified
450 struct keyfield *key;
452 register char *ptr = line->text, *lim = ptr + line->length;
453 register int sword = key->sword, schar = key->schar;
456 while (ptr < lim && sword--)
458 while (ptr < lim && *ptr != tab)
464 while (ptr < lim && sword--)
466 while (ptr < lim && blanks[UCHAR (*ptr)])
468 while (ptr < lim && !blanks[UCHAR (*ptr)])
472 if (key->skipsblanks)
473 while (ptr < lim && blanks[UCHAR (*ptr)])
476 while (ptr < lim && schar--)
482 /* Return the limit of (a pointer to the first character after) the field
483 in LINE specified by KEY. */
488 struct keyfield *key;
490 register char *ptr = line->text, *lim = ptr + line->length;
491 register int eword = key->eword, echar = key->echar;
494 while (ptr < lim && eword--)
496 while (ptr < lim && *ptr != tab)
498 if (ptr < lim && (eword || key->skipeblanks))
502 while (ptr < lim && eword--)
504 while (ptr < lim && blanks[UCHAR (*ptr)])
506 while (ptr < lim && !blanks[UCHAR (*ptr)])
510 if (key->skipeblanks)
511 while (ptr < lim && blanks[UCHAR (*ptr)])
514 while (ptr < lim && echar--)
520 /* Find the lines in BUF, storing pointers and lengths in LINES.
521 Also replace newlines with NULs. */
524 findlines (buf, lines)
528 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
529 struct keyfield *key = keyhead.next;
533 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
534 && lines->used < lines->limit)
536 /* There are various places in the code that rely on a NUL
537 being at the end of in-core lines; NULs inside the lines
538 will not cause trouble, though. */
541 if (lines->used == lines->alloc)
544 lines->lines = (struct line *)
545 xrealloc ((char *) lines->lines,
546 lines->alloc * sizeof (struct line));
549 lines->lines[lines->used].text = beg;
550 lines->lines[lines->used].length = ptr - beg;
552 /* Precompute the position of the first key for efficiency. */
556 lines->lines[lines->used].keylim =
557 limfield (&lines->lines[lines->used], key);
559 lines->lines[lines->used].keylim = ptr;
562 lines->lines[lines->used].keybeg =
563 begfield (&lines->lines[lines->used], key);
566 if (key->skipsblanks)
567 while (blanks[UCHAR (*beg)])
569 lines->lines[lines->used].keybeg = beg;
574 lines->lines[lines->used].keybeg = 0;
575 lines->lines[lines->used].keylim = 0;
582 buf->left = lim - beg;
585 /* Compare strings A and B containing decimal fractions < 1. Each string
586 should begin with a decimal point followed immediately by the digits
587 of the fraction. Strings not of this form are considered to be zero. */
591 register char *a, *b;
593 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
595 if (tmpa == '.' && tmpb == '.')
598 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
599 while (tmpa == tmpb && digits[tmpa]);
600 if (digits[tmpa] && digits[tmpb])
620 else if (tmpa == '.')
629 else if (tmpb == '.')
641 /* Compare strings A and B as numbers without explicitly converting them to
642 machine numbers. Comparatively slow for short strings, but asymptotically
647 register char *a, *b;
649 register int tmpa, tmpb, loga, logb, tmp;
651 tmpa = UCHAR (*a), tmpb = UCHAR (*b);
663 if (digits[tmpa] && digits[tmpb])
674 while (tmpa == tmpb && digits[tmpa])
675 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
677 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
678 return -fraccompare (a, b);
681 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
687 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
692 if ((tmp = logb - loga) != 0)
700 else if (tmpb == '-')
702 if (digits[UCHAR (tmpa)] && digits[UCHAR (*++b)])
713 while (tmpa == tmpb && digits[tmpa])
714 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
716 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
717 return fraccompare (a, b);
720 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
726 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
731 if ((tmp = loga - logb) != 0)
741 /* Return an integer <= 12 associated with month name S with length LEN,
742 0 if the name in S is not recognized. */
750 register int i, lo = 0, hi = 12;
752 while (len > 0 && blanks[UCHAR(*s)])
758 for (i = 0; i < 3; ++i)
759 month[i] = fold_toupper[UCHAR (s[i])];
763 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
767 if (!strcmp (month, monthtab[lo].name))
768 return monthtab[lo].val;
772 /* Compare two lines A and B trying every key in sequence until there
773 are no more keys or a difference is found. */
779 register char *texta, *textb, *lima, *limb, *translate;
780 register int *ignore;
781 struct keyfield *key;
782 int diff = 0, iter = 0, lena, lenb;
784 for (key = keyhead.next; key; key = key->next, ++iter)
786 ignore = key->ignore;
787 translate = key->translate;
789 /* Find the beginning and limit of each field. */
790 if (iter || a->keybeg == NULL || b->keybeg == NULL)
793 lima = limfield (a, key), limb = limfield (b, key);
795 lima = a->text + a->length, limb = b->text + b->length;
798 texta = begfield (a, key), textb = begfield (b, key);
801 texta = a->text, textb = b->text;
802 if (key->skipsblanks)
804 while (texta < lima && blanks[UCHAR (*texta)])
806 while (textb < limb && blanks[UCHAR (*textb)])
813 /* For the first iteration only, the key positions have
814 been precomputed for us. */
815 texta = a->keybeg, lima = a->keylim;
816 textb = b->keybeg, limb = b->keylim;
819 /* Find the lengths. */
820 lena = lima - texta, lenb = limb - textb;
826 /* Actually compare the fields. */
831 char savea = *lima, saveb = *limb;
833 *lima = *limb = '\0';
834 diff = numcompare (texta, textb);
835 *lima = savea, *limb = saveb;
838 diff = numcompare (texta, textb);
841 return key->reverse ? -diff : diff;
846 diff = getmonth (texta, lena) - getmonth (textb, lenb);
848 return key->reverse ? -diff : diff;
851 else if (ignore && translate)
852 while (texta < lima && textb < limb)
854 while (texta < lima && ignore[UCHAR (*texta)])
856 while (textb < limb && ignore[UCHAR (*textb)])
858 if (texta < lima && textb < limb &&
859 translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
861 diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
864 else if (texta == lima && textb < limb) diff = -1;
865 else if (texta < lima && textb == limb) diff = 1;
868 while (texta < lima && textb < limb)
870 while (texta < lima && ignore[UCHAR (*texta)])
872 while (textb < limb && ignore[UCHAR (*textb)])
874 if (texta < lima && textb < limb && *texta++ != *textb++)
876 diff = *--texta - *--textb;
879 else if (texta == lima && textb < limb) diff = -1;
880 else if (texta < lima && textb == limb) diff = 1;
883 while (texta < lima && textb < limb)
885 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
887 diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
892 diff = memcmp (texta, textb, min (lena, lenb));
895 return key->reverse ? -diff : diff;
896 if ((diff = lena - lenb) != 0)
897 return key->reverse ? -diff : diff;
903 /* Compare two lines A and B, returning negative, zero, or positive
904 depending on whether A compares less than, equal to, or greater than B. */
908 register struct line *a, *b;
910 int diff, tmpa, tmpb, mini;
912 /* First try to compare on the specified keys (if any).
913 The only two cases with no key at all are unadorned sort,
914 and unadorned sort -r. */
917 diff = keycompare (a, b);
920 if (unique || stable)
924 /* If the keys all compare equal (or no keys were specified)
925 fall through to the default byte-by-byte comparison. */
926 tmpa = a->length, tmpb = b->length;
927 mini = min (tmpa, tmpb);
932 char *ap = a->text, *bp = b->text;
934 diff = UCHAR (*ap) - UCHAR (*bp);
937 diff = memcmp (ap, bp, mini);
943 return reverse ? -diff : diff;
946 /* Check that the lines read from the given FP come in order. Return
947 1 if they do and 0 if there is a disorder. */
953 struct buffer buf; /* Input buffer. */
954 struct lines lines; /* Lines scanned from the buffer. */
955 struct line *prev_line; /* Pointer to previous line. */
956 struct line temp; /* Copy of previous line. */
957 int cc; /* Character count. */
958 int cmp; /* Result of calling compare. */
959 int alloc, i, success = 1;
961 initbuf (&buf, mergealloc);
962 initlines (&lines, mergealloc / linelength + 1,
963 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
965 temp.text = xmalloc (alloc);
967 cc = fillbuf (&buf, fp);
968 findlines (&buf, &lines);
973 /* Compare each line in the buffer with its successor. */
974 for (i = 0; i < lines.used - 1; ++i)
976 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
977 if ((unique && cmp >= 0) || (cmp > 0))
984 /* Save the last line of the buffer and refill the buffer. */
985 prev_line = lines.lines + lines.used - 1;
986 if (prev_line->length > alloc)
988 while (prev_line->length + 1 > alloc)
990 temp.text = xrealloc (temp.text, alloc);
992 memcpy (temp.text, prev_line->text, prev_line->length + 1);
993 temp.length = prev_line->length;
994 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
995 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
997 cc = fillbuf (&buf, fp);
1000 findlines (&buf, &lines);
1001 /* Make sure the line saved from the old buffer contents is
1002 less than or equal to the first line of the new buffer. */
1003 cmp = compare (&temp, &lines.lines[0]);
1004 if ((unique && cmp >= 0) || (cmp > 0))
1016 free ((char *) lines.lines);
1021 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1022 Close FPS before returning. */
1025 mergefps (fps, nfps, ofp)
1029 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1030 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1031 struct line saved; /* Saved line for unique check. */
1032 int savedflag = 0; /* True if there is a saved line. */
1033 int savealloc; /* Size allocated for the saved line. */
1034 int cur[NMERGE]; /* Current line in each line table. */
1035 int ord[NMERGE]; /* Table representing a permutation of fps,
1036 such that lines[ord[0]].lines[cur[ord[0]]]
1037 is the smallest line and will be next
1039 register int i, j, t;
1041 /* Allocate space for a saved line if necessary. */
1044 savealloc = linelength;
1045 saved.text = xmalloc (savealloc);
1048 /* Read initial lines from each input file. */
1049 for (i = 0; i < nfps; ++i)
1051 initbuf (&buffer[i], mergealloc);
1052 /* If a file is empty, eliminate it from future consideration. */
1053 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1057 for (j = i; j < nfps; ++j)
1058 fps[j] = fps[j + 1];
1061 free (buffer[i].buf);
1064 initlines (&lines[i], mergealloc / linelength + 1,
1065 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1066 findlines (&buffer[i], &lines[i]);
1071 /* Set up the ord table according to comparisons among input lines.
1072 Since this only reorders two items if one is strictly greater than
1073 the other, it is stable. */
1074 for (i = 0; i < nfps; ++i)
1076 for (i = 1; i < nfps; ++i)
1077 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1078 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1079 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1081 /* Repeatedly output the smallest line until no input remains. */
1084 /* If uniqified output is turned on, output only the first of
1085 an identical series of lines. */
1088 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1090 xfwrite (saved.text, 1, saved.length, ofp);
1096 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1098 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1100 saved.text = xrealloc (saved.text, savealloc);
1102 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1103 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1105 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1107 saved.keybeg = saved.text +
1108 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1109 - lines[ord[0]].lines[cur[ord[0]]].text);
1111 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1113 saved.keylim = saved.text +
1114 (lines[ord[0]].lines[cur[ord[0]]].keylim
1115 - lines[ord[0]].lines[cur[ord[0]]].text);
1122 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1123 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1127 /* Check if we need to read more lines into core. */
1128 if (++cur[ord[0]] == lines[ord[0]].used)
1129 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1131 findlines (&buffer[ord[0]], &lines[ord[0]]);
1136 /* We reached EOF on fps[ord[0]]. */
1137 for (i = 1; i < nfps; ++i)
1138 if (ord[i] > ord[0])
1141 xfclose (fps[ord[0]]);
1142 free (buffer[ord[0]].buf);
1143 free ((char *) lines[ord[0]].lines);
1144 for (i = ord[0]; i < nfps; ++i)
1146 fps[i] = fps[i + 1];
1147 buffer[i] = buffer[i + 1];
1148 lines[i] = lines[i + 1];
1149 cur[i] = cur[i + 1];
1151 for (i = 0; i < nfps; ++i)
1152 ord[i] = ord[i + 1];
1156 /* The new line just read in may be larger than other lines
1157 already in core; push it back in the queue until we encounter
1158 a line larger than it. */
1159 for (i = 1; i < nfps; ++i)
1161 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1162 &lines[ord[i]].lines[cur[ord[i]]]);
1164 t = ord[0] - ord[i];
1169 for (j = 1; j < i; ++j)
1170 ord[j - 1] = ord[j];
1174 if (unique && savedflag)
1176 xfwrite (saved.text, 1, saved.length, ofp);
1182 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1185 sortlines (lines, nlines, temp)
1186 struct line *lines, *temp;
1189 register struct line *lo, *hi, *t;
1190 register int nlo, nhi;
1194 if (compare (&lines[0], &lines[1]) > 0)
1195 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1205 sortlines (lo, nlo, temp);
1208 sortlines (hi, nhi, temp);
1213 if (compare (lo, hi) <= 0)
1214 *t++ = *lo++, --nlo;
1216 *t++ = *hi++, --nhi;
1220 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1224 /* Check that each of the NFILES FILES is ordered.
1225 Return a count of disordered files. */
1228 check (files, nfiles)
1232 int i, disorders = 0;
1235 for (i = 0; i < nfiles; ++i)
1237 fp = xfopen (files[i], "r");
1240 printf ("%s: disorder on %s\n", program_name, files[i]);
1247 /* Merge NFILES FILES onto OFP. */
1250 merge (files, nfiles, ofp)
1257 FILE *fps[NMERGE], *tfp;
1259 while (nfiles > NMERGE)
1262 for (i = 0; i < nfiles / NMERGE; ++i)
1264 for (j = 0; j < NMERGE; ++j)
1265 fps[j] = xfopen (files[i * NMERGE + j], "r");
1266 tfp = xfopen (temp = tempname (), "w");
1267 mergefps (fps, NMERGE, tfp);
1269 for (j = 0; j < NMERGE; ++j)
1270 zaptemp (files[i * NMERGE + j]);
1273 for (j = 0; j < nfiles % NMERGE; ++j)
1274 fps[j] = xfopen (files[i * NMERGE + j], "r");
1275 tfp = xfopen (temp = tempname (), "w");
1276 mergefps (fps, nfiles % NMERGE, tfp);
1278 for (j = 0; j < nfiles % NMERGE; ++j)
1279 zaptemp (files[i * NMERGE + j]);
1284 for (i = 0; i < nfiles; ++i)
1285 fps[i] = xfopen (files[i], "r");
1286 mergefps (fps, i, ofp);
1287 for (i = 0; i < nfiles; ++i)
1291 /* Sort NFILES FILES onto OFP. */
1294 sort (files, nfiles, ofp)
1304 struct tempnode *node;
1308 initbuf (&buf, sortalloc);
1309 initlines (&lines, sortalloc / linelength + 1,
1310 LINEALLOC / sizeof (struct line));
1312 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1316 fp = xfopen (*files++, "r");
1317 while (fillbuf (&buf, fp))
1319 findlines (&buf, &lines);
1320 if (lines.used > ntmp)
1322 while (lines.used > ntmp)
1324 tmp = (struct line *)
1325 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1327 sortlines (lines.lines, lines.used, tmp);
1328 if (feof (fp) && !nfiles && !ntemp && !buf.left)
1333 tfp = xfopen (tempname (), "w");
1335 for (i = 0; i < lines.used; ++i)
1336 if (!unique || i == 0
1337 || compare (&lines.lines[i], &lines.lines[i - 1]))
1339 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1349 free ((char *) lines.lines);
1350 free ((char *) tmp);
1354 tempfiles = (char **) xmalloc (ntemp * sizeof (char *));
1356 for (node = temphead.next; i > 0; node = node->next)
1357 tempfiles[--i] = node->name;
1358 merge (tempfiles, ntemp, ofp);
1359 free ((char *) tempfiles);
1363 /* Insert key KEY at the end of the list (`keyhead'). */
1367 struct keyfield *key;
1369 struct keyfield *k = &keyhead;
1381 error (2, 0, "invalid field specification `%s'", s);
1384 /* Handle interrupts and hangups. */
1390 #ifdef _POSIX_VERSION
1391 struct sigaction sigact;
1393 sigact.sa_handler = SIG_DFL;
1394 sigemptyset (&sigact.sa_mask);
1395 sigact.sa_flags = 0;
1396 sigaction (sig, &sigact, NULL);
1397 #else /* !_POSIX_VERSION */
1398 signal (sig, SIG_DFL);
1399 #endif /* _POSIX_VERSION */
1401 kill (getpid (), sig);
1404 /* Set the ordering options for KEY specified in S.
1405 Return the address of the first character in S that
1406 is not a valid ordering option.
1407 BLANKTYPE is the kind of blanks that 'b' should skip. */
1410 set_ordering (s, key, blanktype)
1412 struct keyfield *key;
1413 enum blanktype blanktype;
1420 if (blanktype == bl_start || blanktype == bl_both)
1421 key->skipsblanks = 1;
1422 if (blanktype == bl_end || blanktype == bl_both)
1423 key->skipeblanks = 1;
1426 key->ignore = nondictionary;
1429 key->translate = fold_toupper;
1433 /* Reserved for comparing floating-point numbers. */
1437 key->ignore = nonprinting;
1461 struct keyfield *key = NULL, gkey;
1464 int checkonly = 0, mergeonly = 0, nfiles = 0;
1465 char *minus = "-", *outfile = minus, **files, *tmp;
1467 #ifdef _POSIX_VERSION
1468 struct sigaction oldact, newact;
1469 #endif /* _POSIX_VERSION */
1471 program_name = argv[0];
1473 parse_long_options (argc, argv, "sort", version_string, usage);
1475 have_read_stdin = 0;
1478 temp_file_prefix = getenv ("TMPDIR");
1479 if (temp_file_prefix == NULL)
1480 temp_file_prefix = "/tmp";
1482 #ifdef _POSIX_VERSION
1483 newact.sa_handler = sighandler;
1484 sigemptyset (&newact.sa_mask);
1485 newact.sa_flags = 0;
1487 sigaction (SIGINT, NULL, &oldact);
1488 if (oldact.sa_handler != SIG_IGN)
1489 sigaction (SIGINT, &newact, NULL);
1490 sigaction (SIGHUP, NULL, &oldact);
1491 if (oldact.sa_handler != SIG_IGN)
1492 sigaction (SIGHUP, &newact, NULL);
1493 sigaction (SIGPIPE, NULL, &oldact);
1494 if (oldact.sa_handler != SIG_IGN)
1495 sigaction (SIGPIPE, &newact, NULL);
1496 sigaction (SIGTERM, NULL, &oldact);
1497 if (oldact.sa_handler != SIG_IGN)
1498 sigaction (SIGTERM, &newact, NULL);
1499 #else /* !_POSIX_VERSION */
1500 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1501 signal (SIGINT, sighandler);
1502 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1503 signal (SIGHUP, sighandler);
1504 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1505 signal (SIGPIPE, sighandler);
1506 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1507 signal (SIGTERM, sighandler);
1508 #endif /* !_POSIX_VERSION */
1510 gkey.sword = gkey.eword = -1;
1512 gkey.translate = NULL;
1513 gkey.numeric = gkey.month = gkey.reverse = 0;
1514 gkey.skipsblanks = gkey.skipeblanks = 0;
1516 files = (char **) xmalloc (sizeof (char *) * argc);
1518 for (i = 1; i < argc; ++i)
1520 if (argv[i][0] == '+')
1524 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1527 key->translate = NULL;
1528 key->skipsblanks = key->skipeblanks = 0;
1529 key->numeric = key->month = key->reverse = 0;
1531 if (!digits[UCHAR (*s)])
1532 badfieldspec (argv[i]);
1533 for (t = 0; digits[UCHAR (*s)]; ++s)
1534 t = 10 * t + *s - '0';
1537 for (++s; digits[UCHAR (*s)]; ++s)
1538 t2 = 10 * t2 + *s - '0';
1546 s = set_ordering (s, key, bl_start);
1548 badfieldspec (argv[i]);
1550 else if (argv[i][0] == '-' && argv[i][1])
1553 if (digits[UCHAR (*s)])
1557 for (t = 0; digits[UCHAR (*s)]; ++s)
1558 t = t * 10 + *s - '0';
1561 for (++s; digits[UCHAR (*s)]; ++s)
1562 t2 = t2 * 10 + *s - '0';
1565 s = set_ordering (s, key, bl_end);
1567 badfieldspec (argv[i]);
1574 s = set_ordering (s, &gkey, bl_both);
1588 error (2, 0, "option `-k' requires an argument");
1594 key = (struct keyfield *)
1595 xmalloc (sizeof (struct keyfield));
1598 key->translate = NULL;
1599 key->skipsblanks = key->skipeblanks = 0;
1600 key->numeric = key->month = key->reverse = 0;
1602 if (!digits[UCHAR (*s)])
1603 badfieldspec (argv[i]);
1604 for (t = 0; digits[UCHAR (*s)]; ++s)
1605 t = 10 * t + *s - '0';
1611 for (++s; digits[UCHAR (*s)]; ++s)
1612 t2 = 10 * t2 + *s - '0';
1623 s = set_ordering (s, key, bl_start);
1624 if (*s && *s != ',')
1625 badfieldspec (argv[i]);
1629 for (t = 0; digits[UCHAR (*s)]; ++s)
1630 t = t * 10 + *s - '0';
1636 for (++s; digits[UCHAR (*s)]; ++s)
1637 t2 = t2 * 10 + *s - '0';
1643 s = set_ordering (s, key, bl_end);
1645 badfieldspec (argv[i]);
1659 error (2, 0, "option `-o' requires an argument");
1661 outfile = argv[++i];
1670 else if (i < argc - 1)
1676 error (2, 0, "option `-t' requires an argument");
1680 temp_file_prefix = ++s;
1684 temp_file_prefix = argv[++i];
1686 error (2, 0, "option `-T' requires an argument");
1694 /* Accept and ignore e.g. -y0 for compatibility with
1698 fprintf (stderr, "%s: unrecognized option `-%c'\n",
1706 else /* Not an option. */
1708 files[nfiles++] = argv[i];
1716 /* Inheritance of global options to individual keys. */
1717 for (key = keyhead.next; key; key = key->next)
1718 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
1719 && !key->skipeblanks && !key->month && !key->numeric)
1721 key->ignore = gkey.ignore;
1722 key->translate = gkey.translate;
1723 key->skipsblanks = gkey.skipsblanks;
1724 key->skipeblanks = gkey.skipeblanks;
1725 key->month = gkey.month;
1726 key->numeric = gkey.numeric;
1727 key->reverse = gkey.reverse;
1730 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
1731 || gkey.skipeblanks || gkey.month || gkey.numeric))
1733 reverse = gkey.reverse;
1742 exit (check (files, nfiles) != 0);
1744 if (strcmp (outfile, "-"))
1746 struct stat outstat;
1747 if (safe_stat (outfile, &outstat) == 0)
1749 /* The following code prevents a race condition when
1750 people use the brain dead shell programming idiom:
1751 cat file | sort -o file
1752 This feature is provided for historical compatibility,
1753 but we strongly discourage ever relying on this in
1754 new shell programs. */
1756 /* Temporarily copy each input file that might be another name
1757 for the output file. When in doubt (e.g. a pipe), copy. */
1758 for (i = 0; i < nfiles; ++i)
1764 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
1767 if ((strcmp (files[i], "-")
1768 ? safe_stat (files[i], &instat)
1769 : fstat (fileno (stdin), &instat)) != 0)
1771 error (0, errno, "%s", files[i]);
1775 if (S_ISREG (instat.st_mode)
1776 && (instat.st_ino != outstat.st_ino
1777 || instat.st_dev != outstat.st_dev))
1779 /* We know the files are distinct. */
1784 fp = xfopen (files[i], "r");
1786 ofp = xfopen (tmp, "w");
1787 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
1788 xfwrite (buf, 1, cc, ofp);
1791 error (0, errno, "%s", files[i]);
1800 ofp = xfopen (outfile, "w");
1806 merge (files, nfiles, ofp);
1808 sort (files, nfiles, ofp);
1811 /* If we wait for the implicit flush on exit, and the parent process
1812 has closed stdout (e.g., exec >&- in a shell), then the output file
1813 winds up empty. I don't understand why. This is under SunOS,
1814 Solaris, Ultrix, and Irix. This premature fflush makes the output
1815 reappear. --karl@cs.umb.edu */
1816 if (fflush (ofp) < 0)
1817 error (1, errno, "%s: write error", outfile);
1819 if (have_read_stdin && fclose (stdin) == EOF)
1820 error (1, errno, outfile);
1821 if (ferror (stdout) || fclose (stdout) == EOF)
1822 error (1, errno, "%s: write error", outfile);
1832 fprintf (stderr, "Try `%s --help' for more information.\n",
1837 Usage: %s [OPTION]... [FILE]...\n\
1842 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
1843 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
1844 -T DIRECT use DIRECT for temporary files, not $TMPDIR or /tmp\n\
1845 -b ignore leading blanks in sort fields or keys\n\
1846 -c check if given files already sorted, do not sort\n\
1847 -d consider only [a-zA-Z0-9 ] characters in keys\n\
1848 -f fold lower case to upper case characters in keys\n\
1849 -i consider only [\\040-\\0176] characters in keys\n\
1850 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
1851 -m merge already sorted files, do not sort\n\
1852 -n compare according to string numerical value, imply -b\n\
1853 -o FILE write result on FILE instead of standard output\n\
1854 -r reverse the result of comparisons\n\
1855 -s stabilize sort by disabling last resort comparison\n\
1856 -t SEP use SEParator instead of non- to whitespace transition\n\
1857 -u with -c, check for strict ordering\n\
1858 -u with -m, only output the first of an equal sequence\n\
1859 --help display this help and exit\n\
1860 --version output version information and exit\n\
1862 POS is F[.C][OPTS], where F is the field number and C the character\n\
1863 position in the field, both counted from zero. OPTS is made up of one\n\
1864 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
1865 for that key. If no key given, use the entire line as key. With no\n\
1866 FILE, or when FILE is -, read standard input.\n\