1 /* Prepare TeX index dribble output into an actual index.
5 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
32 #if defined (HAVE_UNISTD_H)
34 #else /* !HAVE_UNISTD_H */
36 #endif /* !HAVE_UNISTD_H */
38 extern char *mktemp ();
40 #if !defined (HAVE_STRERROR)
42 extern char *sys_errlist[];
45 #include <sys/types.h>
47 #if defined (_AIX) || !defined (_POSIX_VERSION)
48 # include <sys/file.h>
54 #define TI_FATAL_ERROR 1
56 #if !defined (SEEK_SET)
60 #endif /* !SEEK_SET */
62 /* When sorting in core, this structure describes one line
63 and the position and length of its first keyfield. */
66 char *text; /* The actual text of the line. */
68 char *text; /* The start of the key (for textual comparison). */
69 long number; /* The numeric value (for numeric comparison). */
71 long keylen; /* Length of KEY field. */
74 /* This structure describes a field to use as a sort key. */
77 int startwords; /* Number of words to skip. */
78 int startchars; /* Number of additional chars to skip. */
79 int endwords; /* Number of words to ignore at end. */
80 int endchars; /* Ditto for characters of last word. */
81 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
82 char fold_case; /* Non-zero means case doesn't matter. */
83 char reverse; /* Non-zero means compare in reverse order. */
84 char numeric; /* Non-zeros means field is ASCII numeric. */
85 char positional; /* Sort according to file position. */
86 char braced; /* Count balanced-braced groupings as fields. */
89 /* Vector of keyfields to use. */
90 struct keyfield keyfields[3];
92 /* Number of keyfields stored in that vector. */
93 int num_keyfields = 3;
95 /* Vector of input file names, terminated with a null pointer. */
98 /* Vector of corresponding output file names, or NULL, meaning default it
99 (add an `s' to the end). */
102 /* Length of `infiles'. */
105 /* Pointer to the array of pointers to lines being sorted. */
108 /* The allocated length of `linearray'. */
111 /* Directory to use for temporary files. On Unix, it ends with a slash. */
114 /* Start of filename to use for temporary files. */
117 /* Number of last temporary file. */
120 /* Number of last temporary file already deleted.
121 Temporary files are deleted by `flush_tempfiles' in order of creation. */
122 int last_deleted_tempcount;
124 /* During in-core sort, this points to the base of the data block
125 which contains all the lines of data. */
128 /* Additional command switches .*/
130 /* Nonzero means do not delete tempfiles -- for debugging. */
133 /* The name this program was run with. */
136 /* Forward declarations of functions in this file. */
138 void decode_command ();
139 void sort_in_core ();
140 void sort_offline ();
145 char *find_braced_pos ();
146 char *find_braced_end ();
148 int compare_field ();
153 void pfatal_with_name ();
156 void *xmalloc (), *xrealloc ();
158 char *maketempname ();
159 void flush_tempfiles ();
162 #define MAX_IN_CORE_SORT 500000
172 last_deleted_tempcount = 0;
173 program_name = argv[0];
175 /* Describe the kind of sorting to do. */
176 /* The first keyfield uses the first braced field and folds case. */
177 keyfields[0].braced = 1;
178 keyfields[0].fold_case = 1;
179 keyfields[0].endwords = -1;
180 keyfields[0].endchars = -1;
182 /* The second keyfield uses the second braced field, numerically. */
183 keyfields[1].braced = 1;
184 keyfields[1].numeric = 1;
185 keyfields[1].startwords = 1;
186 keyfields[1].endwords = -1;
187 keyfields[1].endchars = -1;
189 /* The third keyfield (which is ignored while discarding duplicates)
190 compares the whole line. */
191 keyfields[2].endwords = -1;
192 keyfields[2].endchars = -1;
194 decode_command (argc, argv);
196 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
198 /* Process input files completely, one by one. */
200 for (i = 0; i < num_infiles; i++)
206 desc = open (infiles[i], O_RDONLY, 0);
208 pfatal_with_name (infiles[i]);
209 lseek (desc, 0L, SEEK_END);
210 ptr = lseek (desc, 0L, SEEK_CUR);
214 outfile = outfiles[i];
217 outfile = concat (infiles[i], "s", "");
220 if (ptr < MAX_IN_CORE_SORT)
221 /* Sort a small amount of data. */
222 sort_in_core (infiles[i], ptr, outfile);
224 sort_offline (infiles[i], ptr, outfile);
227 flush_tempfiles (tempcount);
235 Usage: %s [-k] infile [-o outfile] ...\n", program_name);
239 /* Decode the command line arguments to set the parameter variables
240 and set up the vector of keyfields and the vector of input files. */
243 decode_command (argc, argv)
251 /* Store default values into parameter variables. */
253 tempdir = getenv ("TMPDIR");
257 tempdir = concat (tempdir, "/", "");
261 /* Allocate ARGC input files, which must be enough. */
263 infiles = (char **) xmalloc (argc * sizeof (char *));
264 outfiles = (char **) xmalloc (argc * sizeof (char *));
268 while ((optc = getopt (argc, argv, "-ko:")) != EOF)
272 case 1: /* Non-option filename. */
291 /* Record number of keyfields and terminate list of filenames. */
292 num_infiles = ip - infiles;
294 if (num_infiles == 0)
298 /* Return a name for a temporary file. */
305 sprintf (tempsuffix, "%d", count);
306 return concat (tempdir, tempbase, tempsuffix);
309 /* Delete all temporary files up to TO_COUNT. */
312 flush_tempfiles (to_count)
317 while (last_deleted_tempcount < to_count)
318 unlink (maketempname (++last_deleted_tempcount));
321 /* Copy the input file open on IDESC into a temporary file
322 and return the temporary file name. */
330 char *outfile = maketempname (++tempcount);
332 char buffer[BUFSIZE];
334 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
337 pfatal_with_name (outfile);
341 int nread = read (idesc, buffer, BUFSIZE);
342 write (odesc, buffer, nread);
352 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
355 compare_full (line1, line2)
356 char **line1, **line2;
360 /* Compare using the first keyfield;
361 if that does not distinguish the lines, try the second keyfield;
364 for (i = 0; i < num_keyfields; i++)
366 long length1, length2;
367 char *start1 = find_field (&keyfields[i], *line1, &length1);
368 char *start2 = find_field (&keyfields[i], *line2, &length2);
369 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
370 start2, length2, *line2 - text_base);
373 if (keyfields[i].reverse)
379 return 0; /* Lines match exactly. */
382 /* Compare LINE1 and LINE2, described by structures
383 in which the first keyfield is identified in advance.
384 For positional sorting, assumes that the order of the lines in core
385 reflects their nominal order. */
388 compare_prepared (line1, line2)
389 struct lineinfo *line1, *line2;
395 /* Compare using the first keyfield, which has been found for us already. */
396 if (keyfields->positional)
398 if (line1->text - text_base > line2->text - text_base)
403 else if (keyfields->numeric)
404 tem = line1->key.number - line2->key.number;
406 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
407 line2->key.text, line2->keylen, 0);
410 if (keyfields->reverse)
418 /* Compare using the second keyfield;
419 if that does not distinguish the lines, try the third keyfield;
422 for (i = 1; i < num_keyfields; i++)
424 long length1, length2;
425 char *start1 = find_field (&keyfields[i], text1, &length1);
426 char *start2 = find_field (&keyfields[i], text2, &length2);
427 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
428 start2, length2, text2 - text_base);
431 if (keyfields[i].reverse)
437 return 0; /* Lines match exactly. */
440 /* Like compare_full but more general.
441 You can pass any strings, and you can say how many keyfields to use.
442 POS1 and POS2 should indicate the nominal positional ordering of
443 the two lines in the input. */
446 compare_general (str1, str2, pos1, pos2, use_keyfields)
453 /* Compare using the first keyfield;
454 if that does not distinguish the lines, try the second keyfield;
457 for (i = 0; i < use_keyfields; i++)
459 long length1, length2;
460 char *start1 = find_field (&keyfields[i], str1, &length1);
461 char *start2 = find_field (&keyfields[i], str2, &length2);
462 int tem = compare_field (&keyfields[i], start1, length1, pos1,
463 start2, length2, pos2);
466 if (keyfields[i].reverse)
472 return 0; /* Lines match exactly. */
475 /* Find the start and length of a field in STR according to KEYFIELD.
476 A pointer to the starting character is returned, and the length
477 is stored into the int that LENGTHPTR points to. */
480 find_field (keyfield, str, lengthptr)
481 struct keyfield *keyfield;
489 if (keyfield->braced)
490 fun = find_braced_pos;
494 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
495 keyfield->ignore_blanks);
496 if (keyfield->endwords < 0)
498 if (keyfield->braced)
499 end = find_braced_end (start);
503 while (*end && *end != '\n')
509 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
510 if (end - str < start - str)
513 *lengthptr = end - start;
517 /* Return a pointer to a specified place within STR,
518 skipping (from the beginning) WORDS words and then CHARS chars.
519 If IGNORE_BLANKS is nonzero, we skip all blanks
520 after finding the specified word. */
523 find_pos (str, words, chars, ignore_blanks)
531 for (i = 0; i < words; i++)
534 /* Find next bunch of nonblanks and skip them. */
535 while ((c = *p) == ' ' || c == '\t')
537 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
539 if (!*p || *p == '\n')
543 while (*p == ' ' || *p == '\t')
546 for (i = 0; i < chars; i++)
548 if (!*p || *p == '\n')
555 /* Like find_pos but assumes that each field is surrounded by braces
556 and that braces within fields are balanced. */
559 find_braced_pos (str, words, chars, ignore_blanks)
569 for (i = 0; i < words; i++)
572 while ((c = *p++) != '{' && c != '\n' && c)
583 if (c == 0 || c == '\n')
588 while ((c = *p++) != '{' && c != '\n' && c)
595 while ((c = *p) == ' ' || c == '\t')
598 for (i = 0; i < chars; i++)
600 if (!*p || *p == '\n')
607 /* Find the end of the balanced-brace field which starts at STR.
608 The position returned is just before the closing brace. */
611 find_braced_end (str)
626 if (c == 0 || c == '\n')
633 find_value (start, length)
639 if (isdigit (*start))
647 /* Vector used to translate characters for comparison.
648 This is how we make all alphanumerics follow all else,
649 and ignore case in the first sorting. */
656 for (i = 1; i < 256; i++)
659 for (i = '0'; i <= '9'; i++)
660 char_order[i] += 512;
662 for (i = 'a'; i <= 'z'; i++)
664 char_order[i] = 512 + i;
665 char_order[i + 'A' - 'a'] = 512 + i;
669 /* Compare two fields (each specified as a start pointer and a character count)
670 according to KEYFIELD.
671 The sign of the value reports the relation between the fields. */
674 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
675 struct keyfield *keyfield;
683 if (keyfields->positional)
690 if (keyfield->numeric)
692 long value = find_value (start1, length1) - find_value (start2, length2);
703 char *e1 = start1 + length1;
704 char *e2 = start2 + length2;
719 if (char_order[c1] != char_order[c2])
720 return char_order[c1] - char_order[c2];
725 /* Strings are equal except possibly for case. */
742 /* Reverse sign here so upper case comes out last. */
752 /* A `struct linebuffer' is a structure which holds a line of text.
753 `readline' reads a line from a stream into a linebuffer
754 and works regardless of the length of the line. */
762 /* Initialize LINEBUFFER for use. */
765 initbuffer (linebuffer)
766 struct linebuffer *linebuffer;
768 linebuffer->size = 200;
769 linebuffer->buffer = (char *) xmalloc (200);
772 /* Read a line of text from STREAM into LINEBUFFER.
773 Return the length of the line. */
776 readline (linebuffer, stream)
777 struct linebuffer *linebuffer;
780 char *buffer = linebuffer->buffer;
781 char *p = linebuffer->buffer;
782 char *end = p + linebuffer->size;
786 int c = getc (stream);
789 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
790 p += buffer - linebuffer->buffer;
791 end += buffer - linebuffer->buffer;
792 linebuffer->buffer = buffer;
794 if (c < 0 || c == '\n')
805 /* Sort an input file too big to sort in core. */
808 sort_offline (infile, nfiles, total, outfile)
814 /* More than enough. */
815 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
816 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
817 FILE *istream = fopen (infile, "r");
819 struct linebuffer lb;
825 /* Read in one line of input data. */
827 linelength = readline (&lb, istream);
829 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
831 error ("%s: not a texinfo index file", infile);
835 /* Split up the input into `ntemps' temporary files, or maybe fewer,
836 and put the new files' names into `tempfiles' */
838 for (i = 0; i < ntemps; i++)
840 char *outname = maketempname (++tempcount);
841 FILE *ostream = fopen (outname, "w");
845 pfatal_with_name (outname);
846 tempfiles[i] = outname;
848 /* Copy lines into this temp file as long as it does not make file
849 "too big" or until there are no more lines. */
851 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
853 tempsize += linelength + 1;
854 fputs (lb.buffer, ostream);
855 putc ('\n', ostream);
857 /* Read another line of input data. */
859 linelength = readline (&lb, istream);
860 if (!linelength && feof (istream))
863 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
865 error ("%s: not a texinfo index file", infile);
878 /* Record number of temp files we actually needed. */
882 /* Sort each tempfile into another tempfile.
883 Delete the first set of tempfiles and put the names of the second
886 for (i = 0; i < ntemps; i++)
888 char *newtemp = maketempname (++tempcount);
889 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
891 unlink (tempfiles[i]);
892 tempfiles[i] = newtemp;
898 /* Merge the tempfiles together and indexify. */
900 merge_files (tempfiles, ntemps, outfile);
903 /* Sort INFILE, whose size is TOTAL,
904 assuming that is small enough to be done in-core,
905 then indexify it and send the output to OUTFILE (or to stdout). */
908 sort_in_core (infile, total, outfile)
914 char *data = (char *) xmalloc (total + 1);
918 FILE *ostream = stdout;
919 struct lineinfo *lineinfo;
921 /* Read the contents of the file into the moby array `data'. */
923 int desc = open (infile, O_RDONLY, 0);
926 fatal ("failure reopening %s", infile);
927 for (file_size = 0;;)
929 i = read (desc, data + file_size, total - file_size);
939 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
941 error ("%s: not a texinfo index file", infile);
947 /* Sort routines want to know this address. */
951 /* Create the array of pointers to lines, with a default size
952 frequently enough. */
957 linearray = (char **) xmalloc (nlines * sizeof (char *));
959 /* `nextline' points to the next free slot in this array.
960 `nlines' is the allocated size. */
962 nextline = linearray;
964 /* Parse the input file's data, and make entries for the lines. */
966 nextline = parsefile (infile, nextline, file_data, file_size);
969 error ("%s: not a texinfo index file", infile);
973 /* Sort the lines. */
975 /* If we have enough space, find the first keyfield of each line in advance.
976 Make a `struct lineinfo' for each line, which records the keyfield
977 as well as the line, and sort them. */
979 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
986 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
989 lp->key.text = find_field (keyfields, *p, &lp->keylen);
990 if (keyfields->numeric)
991 lp->key.number = find_value (lp->key.text, lp->keylen);
994 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
996 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1002 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1004 /* Open the output file. */
1008 ostream = fopen (outfile, "w");
1010 pfatal_with_name (outfile);
1013 writelines (linearray, nextline - linearray, ostream);
1021 /* Parse an input string in core into lines.
1022 DATA is the input string, and SIZE is its length.
1023 Data goes in LINEARRAY starting at NEXTLINE.
1024 The value returned is the first entry in LINEARRAY still unused.
1025 Value 0 means input file contents are invalid. */
1028 parsefile (filename, nextline, data, size)
1035 char **line = nextline;
1043 if (p[0] != '\\' && p[0] != '@')
1047 while (*p && *p != '\n')
1053 if (line == linearray + nlines)
1055 char **old = linearray;
1056 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1057 line += linearray - old;
1064 /* Indexification is a filter applied to the sorted lines
1065 as they are being written to the output file.
1066 Multiple entries for the same name, with different page numbers,
1067 get combined into a single entry with multiple page numbers.
1068 The first braced field, which is used for sorting, is discarded.
1069 However, its first character is examined, folded to lower case,
1070 and if it is different from that in the previous line fed to us
1071 a \initial line is written with one argument, the new initial.
1073 If an entry has four braced fields, then the second and third
1074 constitute primary and secondary names.
1075 In this case, each change of primary name
1076 generates a \primary line which contains only the primary name,
1077 and in between these are \secondary lines which contain
1078 just a secondary name and page numbers. */
1080 /* The last primary name we wrote a \primary entry for.
1081 If only one level of indexing is being done, this is the last name seen. */
1083 /* Length of storage allocated for lastprimary. */
1084 int lastprimarylength;
1086 /* Similar, for the secondary name. */
1087 char *lastsecondary;
1088 int lastsecondarylength;
1090 /* Zero if we are not in the middle of writing an entry.
1091 One if we have written the beginning of an entry but have not
1092 yet written any page numbers into it.
1093 Greater than one if we have written the beginning of an entry
1094 plus at least one page number. */
1097 /* The initial (for sorting purposes) of the last primary entry written.
1098 When this changes, a \initial {c} line is written */
1102 int lastinitiallength;
1104 /* When we need a string of length 1 for the value of lastinitial,
1107 char lastinitial1[2];
1109 /* Initialize static storage for writing an index. */
1125 lastinitial = lastinitial1;
1126 lastinitial1[0] = 0;
1127 lastinitial1[1] = 0;
1128 lastinitiallength = 0;
1129 lastprimarylength = 100;
1130 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1131 xbzero (lastprimary, lastprimarylength + 1);
1132 lastsecondarylength = 100;
1133 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1134 xbzero (lastsecondary, lastsecondarylength + 1);
1137 /* Indexify. Merge entries for the same name,
1138 insert headers for each initial character, etc. */
1141 indexify (line, ostream)
1145 char *primary, *secondary, *pagenumber;
1146 int primarylength, secondarylength = 0, pagelength;
1153 /* First, analyze the parts of the entry fed to us this time. */
1155 p = find_braced_pos (line, 0, 0, 0);
1159 /* Get length of inner pair of braces starting at `p',
1160 including that inner pair of braces. */
1161 initiallength = find_braced_end (p + 1) + 1 - p;
1170 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1174 pagenumber = find_braced_pos (line, 1, 0, 0);
1175 pagelength = find_braced_end (pagenumber) - pagenumber;
1176 if (pagelength == 0)
1179 primary = find_braced_pos (line, 2, 0, 0);
1180 primarylength = find_braced_end (primary) - primary;
1182 secondary = find_braced_pos (line, 3, 0, 0);
1183 nosecondary = !*secondary;
1185 secondarylength = find_braced_end (secondary) - secondary;
1187 /* If the primary is different from before, make a new primary entry. */
1188 if (strncmp (primary, lastprimary, primarylength))
1190 /* Close off current secondary entry first, if one is open. */
1193 fputs ("}\n", ostream);
1197 /* If this primary has a different initial, include an entry for
1199 if (initiallength != lastinitiallength ||
1200 strncmp (initial, lastinitial, initiallength))
1202 fprintf (ostream, "\\initial {");
1203 fwrite (initial, 1, initiallength, ostream);
1204 fprintf (ostream, "}\n", initial);
1205 if (initial == initial1)
1207 lastinitial = lastinitial1;
1208 *lastinitial1 = *initial1;
1212 lastinitial = initial;
1214 lastinitiallength = initiallength;
1217 /* Make the entry for the primary. */
1219 fputs ("\\entry {", ostream);
1221 fputs ("\\primary {", ostream);
1222 fwrite (primary, primarylength, 1, ostream);
1225 fputs ("}{", ostream);
1229 fputs ("}\n", ostream);
1231 /* Record name of most recent primary. */
1232 if (lastprimarylength < primarylength)
1234 lastprimarylength = primarylength + 100;
1235 lastprimary = (char *) xrealloc (lastprimary,
1236 1 + lastprimarylength);
1238 strncpy (lastprimary, primary, primarylength);
1239 lastprimary[primarylength] = 0;
1241 /* There is no current secondary within this primary, now. */
1242 lastsecondary[0] = 0;
1245 /* Should not have an entry with no subtopic following one with a subtopic. */
1247 if (nosecondary && *lastsecondary)
1248 error ("entry %s follows an entry with a secondary name", line);
1250 /* Start a new secondary entry if necessary. */
1251 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1255 fputs ("}\n", ostream);
1259 /* Write the entry for the secondary. */
1260 fputs ("\\secondary {", ostream);
1261 fwrite (secondary, secondarylength, 1, ostream);
1262 fputs ("}{", ostream);
1265 /* Record name of most recent secondary. */
1266 if (lastsecondarylength < secondarylength)
1268 lastsecondarylength = secondarylength + 100;
1269 lastsecondary = (char *) xrealloc (lastsecondary,
1270 1 + lastsecondarylength);
1272 strncpy (lastsecondary, secondary, secondarylength);
1273 lastsecondary[secondarylength] = 0;
1276 /* Here to add one more page number to the current entry. */
1278 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1279 fwrite (pagenumber, pagelength, 1, ostream);
1282 /* Close out any unfinished output entry. */
1285 finish_index (ostream)
1289 fputs ("}\n", ostream);
1291 free (lastsecondary);
1294 /* Copy the lines in the sorted order.
1295 Each line is copied out of the input file it was found in. */
1298 writelines (linearray, nlines, ostream)
1303 char **stop_line = linearray + nlines;
1308 /* Output the text of the lines, and free the buffer space. */
1310 for (next_line = linearray; next_line != stop_line; next_line++)
1312 /* If -u was specified, output the line only if distinct from previous one. */
1313 if (next_line == linearray
1314 /* Compare previous line with this one, using only the
1315 explicitly specd keyfields. */
1316 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1318 char *p = *next_line;
1321 while ((c = *p++) && c != '\n')
1324 indexify (*next_line, ostream);
1328 finish_index (ostream);
1331 /* Assume (and optionally verify) that each input file is sorted;
1332 merge them and output the result.
1333 Returns nonzero if any input file fails to be sorted.
1335 This is the high-level interface that can handle an unlimited
1338 #define MAX_DIRECT_MERGE 10
1341 merge_files (infiles, nfiles, outfile)
1350 int start_tempcount = tempcount;
1352 if (nfiles <= MAX_DIRECT_MERGE)
1353 return merge_direct (infiles, nfiles, outfile);
1355 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1356 making a temporary file to hold each group's result. */
1358 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1359 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1360 for (i = 0; i < ntemps; i++)
1362 int nf = MAX_DIRECT_MERGE;
1363 if (i + 1 == ntemps)
1364 nf = nfiles - i * MAX_DIRECT_MERGE;
1365 tempfiles[i] = maketempname (++tempcount);
1366 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1369 /* All temporary files that existed before are no longer needed
1370 since their contents have been merged into our new tempfiles.
1372 flush_tempfiles (start_tempcount);
1374 /* Now merge the temporary files we created. */
1376 merge_files (tempfiles, ntemps, outfile);
1383 /* Assume (and optionally verify) that each input file is sorted;
1384 merge them and output the result.
1385 Returns nonzero if any input file fails to be sorted.
1387 This version of merging will not work if the number of
1388 input files gets too high. Higher level functions
1389 use it only with a bounded number of input files. */
1392 merge_direct (infiles, nfiles, outfile)
1397 struct linebuffer *lb1, *lb2;
1398 struct linebuffer **thisline, **prevline;
1404 struct linebuffer *prev_out = 0;
1405 FILE *ostream = stdout;
1409 ostream = fopen (outfile, "w");
1412 pfatal_with_name (outfile);
1423 /* For each file, make two line buffers.
1424 Also, for each file, there is an element of `thisline'
1425 which points at any time to one of the file's two buffers,
1426 and an element of `prevline' which points to the other buffer.
1427 `thisline' is supposed to point to the next available line from the file,
1428 while `prevline' holds the last file line used,
1429 which is remembered so that we can verify that the file is properly sorted. */
1431 /* lb1 and lb2 contain one buffer each per file. */
1432 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1433 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1435 /* thisline[i] points to the linebuffer holding the next available line in file i,
1436 or is zero if there are no lines left in that file. */
1437 thisline = (struct linebuffer **)
1438 xmalloc (nfiles * sizeof (struct linebuffer *));
1439 /* prevline[i] points to the linebuffer holding the last used line
1440 from file i. This is just for verifying that file i is properly
1442 prevline = (struct linebuffer **)
1443 xmalloc (nfiles * sizeof (struct linebuffer *));
1444 /* streams[i] holds the input stream for file i. */
1445 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1446 /* file_lossage[i] is nonzero if we already know file i is not
1448 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1450 /* Allocate and initialize all that storage. */
1452 for (i = 0; i < nfiles; i++)
1454 initbuffer (&lb1[i]);
1455 initbuffer (&lb2[i]);
1456 thisline[i] = &lb1[i];
1457 prevline[i] = &lb2[i];
1458 file_lossage[i] = 0;
1459 streams[i] = fopen (infiles[i], "r");
1461 pfatal_with_name (infiles[i]);
1463 readline (thisline[i], streams[i]);
1466 /* Keep count of number of files not at eof. */
1471 struct linebuffer *best = 0;
1472 struct linebuffer *exch;
1476 /* Look at the next avail line of each file; choose the least one. */
1478 for (i = 0; i < nfiles; i++)
1482 0 < compare_general (best->buffer, thisline[i]->buffer,
1483 (long) bestfile, (long) i, num_keyfields)))
1490 /* Output that line, unless it matches the previous one and we
1491 don't want duplicates. */
1494 !compare_general (prev_out->buffer,
1495 best->buffer, 0L, 1L, num_keyfields - 1)))
1496 indexify (best->buffer, ostream);
1499 /* Now make the line the previous of its file, and fetch a new
1500 line from that file. */
1502 exch = prevline[bestfile];
1503 prevline[bestfile] = thisline[bestfile];
1504 thisline[bestfile] = exch;
1508 /* If the file has no more, mark it empty. */
1510 if (feof (streams[bestfile]))
1512 thisline[bestfile] = 0;
1513 /* Update the number of files still not empty. */
1517 readline (thisline[bestfile], streams[bestfile]);
1518 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1523 finish_index (ostream);
1525 /* Free all storage and close all input streams. */
1527 for (i = 0; i < nfiles; i++)
1529 fclose (streams[i]);
1530 free (lb1[i].buffer);
1531 free (lb2[i].buffer);
1533 free (file_lossage);
1546 /* Print error message and exit. */
1553 exit (TI_FATAL_ERROR);
1556 /* Print error message. S1 is printf control string, S2 is arg for it. */
1562 printf ("%s: ", program_name);
1567 #if !defined (HAVE_STRERROR)
1572 static char ebuf[40];
1575 return sys_errlist[n];
1578 sprintf (ebuf, "Unknown error %d", n);
1585 perror_with_name (name)
1590 s = concat ("", strerror (errno), " for %s");
1595 pfatal_with_name (name)
1600 s = concat ("", strerror (errno), " for %s");
1604 /* Return a newly-allocated string whose contents concatenate those of
1611 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1612 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1614 strcpy (result, s1);
1615 strcpy (result + len1, s2);
1616 strcpy (result + len1 + len2, s3);
1617 *(result + len1 + len2 + len3) = 0;
1622 /* Just like malloc, but kills the program in case of fatal error. */
1627 void *temp = (void *) malloc (nbytes);
1629 if (nbytes && temp == (void *)NULL)
1630 memory_error ("xmalloc", nbytes);
1635 /* Like realloc (), but barfs if there isn't enough memory. */
1637 xrealloc (pointer, nbytes)
1644 temp = (void *)xmalloc (nbytes);
1646 temp = (void *)realloc (pointer, nbytes);
1648 if (nbytes && !temp)
1649 memory_error ("xrealloc", nbytes);
1654 memory_error (callers_name, bytes_wanted)
1658 char printable_string[80];
1660 sprintf (printable_string,
1661 "Virtual memory exhausted in %s ()! Needed %d bytes.",
1662 callers_name, bytes_wanted);
1664 error (printable_string, "");