1 /*===========================================================================
2 Copyright (c) 1998-2000, The Santa Cruz Operation
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 *Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
11 *Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 *Neither name of The Santa Cruz Operation nor the names of its contributors
16 may be used to endorse or promote products derived from this software
17 without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
20 IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 =========================================================================*/
38 #include <sys/types.h>
48 #define DEBUG 0 /* debugging code and realloc messages */
49 #define BLOCKSIZE 2 * BUFSIZ /* logical block size */
50 #define POSTINC 10000 /* posting buffer size increment */
51 #define SEP ' ' /* sorted posting field separator */
52 #define SETINC 100 /* posting set size increment */
53 #define STATS 0 /* print statistics */
54 #define SUPERINC 10000 /* super index size increment */
55 #define TERMMAX 512 /* term max size */
56 #define FMTVERSION 1 /* inverted index format version */
57 #define ZIPFSIZE 200 /* zipf curve size */
59 static char const rcsid[] = "$Id: invlib.c,v 1.21 2012/07/10 20:01:40 nhorman Exp $";
62 /* FIXME HBB 20010705: nowhere in the source is `invbreak' ever set to
63 * a value other than the (silent) initialization to zero. Pretty
64 * useless, that looks */
68 static int boolready(void);
69 static int invnewterm(void);
70 static void invstep(INVCONTROL *invcntl);
71 static void invcannotalloc(unsigned n);
72 static void invcannotopen(char *file);
73 static void invcannotwrite(char *file);
76 int showzipf; /* show postings per term distribution */
79 static POSTING *item, *enditem, *item1 = NULL, *item2 = NULL;
80 static unsigned setsize1, setsize2;
81 static long numitems, totterm, zerolong;
82 static char *indexfile, *postingfile;
83 static FILE *outfile, *fpost;
84 static unsigned supersize = SUPERINC, supintsize;
85 static unsigned int numpost, numlogblk, amtused, nextpost;
86 static unsigned int lastinblk, numinvitems;
87 static POSTING *POST, *postptr;
88 static unsigned long *SUPINT, *supint, nextsupfing;
89 static char *SUPFING, *supfing;
90 static char thisterm[TERMMAX];
91 typedef union logicalblk {
92 long invblk[BLOCKSIZE / sizeof(long)];
93 char chrblk[BLOCKSIZE];
95 static t_logicalblk logicalblk;
102 static int zipf[ZIPFSIZE + 1];
106 invmake(char *invname, char *invpost, FILE *infile)
111 long fileindex = 0; /* initialze, to avoid warning */
112 unsigned postsize = POSTINC * sizeof(POSTING);
113 unsigned long *intptr;
118 char temp[BLOCKSIZE];
121 unsigned maxtermlen = 0;
124 if ((outfile = vpfopen(invname, "w+b")) == NULL) {
125 invcannotopen(invname);
129 fseek(outfile, BUFSIZ, SEEK_SET);
132 if ((fpost = vpfopen(invpost, "wb")) == NULL) {
133 invcannotopen(invpost);
136 postingfile = invpost;
138 /* get space for the postings list */
139 if ((POST = malloc(postsize)) == NULL) {
140 invcannotalloc(postsize);
144 /* get space for the superfinger (superindex) */
145 if ((SUPFING = malloc(supersize)) == NULL) {
146 invcannotalloc(supersize);
150 /* FIXME HBB: magic number alert (40) */
151 supintsize = supersize / 40;
152 /* also for the superfinger index */
153 if ((SUPINT = malloc(supintsize * sizeof(long))) == NULL) {
154 invcannotalloc(supintsize * sizeof(long));
158 supint++; /* leave first term open for a count */
159 /* initialize using an empty term */
160 strcpy(thisterm, "");
171 /* set up as though a block had come and gone, i.e., set up for new block */
172 /* 3 longs needed for: numinvitems, next block, and previous block */
173 amtused = 3 * sizeof(long);
176 lastinblk = sizeof(t_logicalblk);
178 /* now loop as long as more to read (till eof) */
179 while (fgets(line, TERMMAX, infile) != NULL) {
183 s = strchr(line, SEP);
191 if ((i = strlen(line)) > maxtermlen) {
196 printf("%ld: %s ", totpost, line);
199 if (strcmp(thisterm, line) == 0) {
200 if (postptr + 10 > POST + postsize / sizeof(POSTING)) {
202 postsize += POSTINC * sizeof(POSTING);
203 if ((POST = realloc(POST, postsize)) == NULL) {
204 invcannotalloc(postsize);
209 printf("reallocated post space to %u, totpost=%ld\n",
215 /* have a new term */
219 strcpy(thisterm, line);
224 /* get the new posting */
228 num = BASE * num + *++s - '!';
229 } while (++i < PRECISION);
230 posting.lineoffset = num;
231 while (++fileindex < nsrcfiles && num > srcoffset[fileindex]) {
234 posting.fileindex = --fileindex;
239 while (*++s != '\n') {
240 num = BASE * num + *s - '!';
242 posting.fcnoffset = num;
245 posting.fcnoffset = 0;
247 *postptr++ = posting;
249 printf("%ld %ld %ld %ld\n", posting.fileindex,
250 posting.fcnoffset, posting.lineoffset, posting.type);
257 /* now clean up final block */
258 logicalblk.invblk[0] = numinvitems;
259 /* loops pointer around to start */
260 logicalblk.invblk[1] = 0;
261 logicalblk.invblk[2] = numlogblk - 1;
262 if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {
266 /* write out block to save space. what in it doesn't matter */
267 if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {
270 /* finish up the super finger */
272 /* add to the offsets the size of the offset pointers */
273 intptr = (SUPINT + 1);
274 i = (char *)supint - (char *)SUPINT;
275 while (intptr < supint)
277 /* write out the offsets (1 for the N at start) and the super finger */
278 if (fwrite(SUPINT, sizeof(*SUPINT), numlogblk + 1, outfile) == 0 ||
279 fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {
282 /* save the size for reference later */
283 nextsupfing = sizeof(long) + sizeof(long) * numlogblk + (supfing - SUPFING);
284 /* make sure the file ends at a logical block boundary. This is
285 necessary for invinsert to correctly create extended blocks
287 i = nextsupfing % sizeof(t_logicalblk);
288 /* write out junk to fill log blk */
289 if (fwrite(temp, sizeof(t_logicalblk) - i, 1, outfile) == 0 ||
290 fflush(outfile) == EOF) { /* rewind doesn't check for write failure */
293 /* write the control area */
295 param.version = FMTVERSION;
297 param.sizeblk = sizeof(t_logicalblk);
298 param.startbyte = (numlogblk + 1) * sizeof(t_logicalblk) + BUFSIZ;;
299 param.supsize = nextsupfing;
300 param.cntlsize = BUFSIZ;
302 if (fwrite(¶m, sizeof(param), 1, outfile) == 0) {
305 for (i = 0; i < 10; i++) /* for future use */
306 if (fwrite(&zerolong, sizeof(zerolong), 1, outfile) == 0) {
310 /* make first block loop backwards to last block */
311 if (fflush(outfile) == EOF) { /* fseek doesn't check for write failure */
314 /* get to second word first block */
315 fseek(outfile, BUFSIZ + 2 * sizeof(long), SEEK_SET);
316 tlong = numlogblk - 1;
317 if (fwrite(&tlong, sizeof(tlong), 1, outfile) == 0 ||
318 fclose(outfile) == EOF) {
320 invcannotwrite(invname);
323 if (fclose(fpost) == EOF) {
324 invcannotwrite(postingfile);
327 --totterm; /* don't count null term */
329 printf("logical blocks = %d, postings = %ld, terms = %ld, max term length = %d\n",
330 numlogblk, totpost, totterm, maxtermlen);
332 printf("\n************* ZIPF curve ****************\n");
333 for (j = ZIPFSIZE; j > 1; j--)
336 for (i = 1; i < j; ++i) {
337 printf("%3d -%6d ", i, zipf[i]);
338 if (i % 6 == 0) putchar('\n');
340 printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);
343 /* free all malloc'd memory */
350 /* add a term to the data base */
355 int backupflag, i, j, holditems, gooditems, howfar;
356 unsigned int maxback, len, numwilluse, wdlen;
360 unsigned long packword[2];
364 gooditems = 0; /* initialize, to avoid warning */
367 /* keep zipfian info on the distribution */
368 if (numpost <= ZIPFSIZE)
373 len = strlen(thisterm);
374 /* length of term rounded up to long boundary */
375 wdlen = (len + (sizeof(long) - 1)) / sizeof(long);
376 /* each term needs 2 longs for its iteminfo and
377 * 1 long for its offset */
378 numwilluse = (wdlen + 3) * sizeof(long);
379 /* new block if at least 1 item in block */
380 if (numinvitems && numwilluse + amtused > sizeof(t_logicalblk)) {
381 /* set up new block */
382 if (supfing + 500 > SUPFING + supersize) {
383 i = supfing - SUPFING;
385 if ((SUPFING = (char *)realloc(SUPFING, supersize)) == NULL) {
386 invcannotalloc(supersize);
389 supfing = i + SUPFING;
391 printf("reallocated superfinger space to %d, totpost=%ld\n",
395 /* check that room for the offset as well */
396 /* FIXME HBB: magic number alert (10) */
397 if ((numlogblk + 10) > supintsize) {
399 supintsize += SUPERINC;
400 if ((SUPINT = realloc(SUPINT, supintsize * sizeof(long))) == NULL) {
401 invcannotalloc(supintsize * sizeof(long));
406 printf("reallocated superfinger offset to %d, totpost = %ld\n",
407 supintsize * sizeof(long), totpost);
410 /* See if backup is efficatious */
412 maxback = (int) strlen(thisterm) / 10;
413 holditems = numinvitems;
414 if (maxback > numinvitems)
415 maxback = numinvitems - 2;
417 while (maxback-- > 1) {
419 iteminfo.packword[0] =
420 logicalblk.invblk[--holditems * 2 + (sizeof(long) - 1)];
421 if ((i = iteminfo.e.size / 10) < maxback) {
424 gooditems = holditems;
427 /* see if backup will occur */
429 numinvitems = gooditems;
431 logicalblk.invblk[0] = numinvitems;
432 /* set forward pointer pointing to next */
433 logicalblk.invblk[1] = numlogblk + 1;
434 /* set back pointer to last block */
435 logicalblk.invblk[2] = numlogblk - 1;
436 if (fwrite(logicalblk.chrblk, 1, sizeof(t_logicalblk), outfile) == 0) {
437 invcannotwrite(indexfile);
440 /* 3 longs needed for: numinvitems, next block, and previous block */
441 amtused = 3 * sizeof(long);
443 /* check if had to back up, if so do it */
447 /* find out where the end of the new block is */
448 iteminfo.packword[0] = logicalblk.invblk[numinvitems*2+1];
449 tptr3 = logicalblk.chrblk + iteminfo.e.offset;
450 /* move the index for this block */
451 for (i = 3; i <= (backupflag * 2 + 2); i++)
452 logicalblk.invblk[i] = logicalblk.invblk[numinvitems*2+i];
453 /* move the word into the super index */
454 iteminfo.packword[0] = logicalblk.invblk[3];
455 iteminfo.packword[1] = logicalblk.invblk[4];
456 tptr2 = logicalblk.chrblk + iteminfo.e.offset;
457 strncpy(supfing, tptr2, (int) iteminfo.e.size);
458 *(supfing + iteminfo.e.size) = '\0';
460 printf("backup %d at term=%s to term=%s\n",
461 backupflag, thisterm, supfing);
463 *supint++ = nextsupfing;
464 nextsupfing += strlen(supfing) + 1;
465 supfing += strlen(supfing) + 1;
466 /* now fix up the logical block */
467 tptr = logicalblk.chrblk + lastinblk;
468 lastinblk = sizeof(t_logicalblk);
469 tptr2 = logicalblk.chrblk + lastinblk;
474 amtused += ((2 * sizeof(long)) * backupflag + j);
475 for (i = 3; i < (backupflag * 2 + 2); i += 2) {
476 iteminfo.packword[0] = logicalblk.invblk[i];
477 iteminfo.e.offset += (tptr2 - tptr3);
478 logicalblk.invblk[i] = iteminfo.packword[0];
480 numinvitems = backupflag;
481 } else { /* no backup needed */
483 lastinblk = sizeof(t_logicalblk);
484 /* add new term to superindex */
485 strcpy(supfing, thisterm);
486 supfing += strlen(thisterm) + 1;
487 *supint++ = nextsupfing;
488 nextsupfing += strlen(thisterm) + 1;
491 /* HBB 20010501: Fixed bug by replacing magic number '8' by
492 * what it actually represents. */
493 lastinblk -= (numwilluse - 2 * sizeof(long));
494 iteminfo.e.offset = lastinblk;
495 iteminfo.e.size = len;
496 iteminfo.e.space = 0;
497 iteminfo.e.post = numpost;
498 strncpy(logicalblk.chrblk + lastinblk, thisterm, len);
499 amtused += numwilluse;
500 logicalblk.invblk[(lastinblk/sizeof(long))+wdlen] = nextpost;
501 if ((i = postptr - POST) > 0) {
502 if (fwrite(POST, sizeof(POSTING), i, fpost) == 0) {
503 invcannotwrite(postingfile);
506 nextpost += i * sizeof(POSTING);
508 logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0];
509 logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1];
514 * If 'invname' ends with the 'from' substring, it is replaced inline with the
515 * 'to' substring (which must be of the exact same length), and the function
516 * returns 0. Otherwise, returns -1.
520 invflipname(char * invname, const char *from, const char *to)
522 char *temp, *i = NULL;
524 assert(strlen(from) == strlen(to));
527 while( (temp = strstr(temp + 1, from)))
529 if (!i || i[strlen(from)] != '\0')
537 invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat)
541 if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))) == NULL) {
542 /* If db created without '-f', but now invoked with '-f cscope.out',
543 * we need to check for 'cscope.in.out', rather than 'cscope.out.in':
544 * I.e, hack around our own violation of the inverse db naming convention */
545 if (!invflipname(invname, INVNAME2, INVNAME)) {
546 if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))))
548 invflipname(invname, INVNAME, INVNAME2); /* change back for err msg */
550 /* more silliness: if you create the db with '-f cscope', then try to open
551 * it without '-f cscope', you'll fail unless we check for 'cscope.out.in'
553 else if (!invflipname(invname, INVNAME, INVNAME2)) {
554 if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))))
556 invflipname(invname, INVNAME2, INVNAME); /* change back for err msg */
558 invcannotopen(invname);
562 if (fread(&invcntl->param, sizeof(invcntl->param), 1, invcntl->invfile) == 0) {
563 fprintf(stderr, "%s: empty inverted file\n", argv0);
566 if (invcntl->param.version != FMTVERSION) {
567 fprintf(stderr, "%s: cannot read old index format; use -U option to force database to rebuild\n", argv0);
570 assert(invcntl->param.sizeblk == sizeof(t_logicalblk));
572 if (stat == 0 && invcntl->param.filestat == INVALONE) {
573 fprintf(stderr, "%s: inverted file is locked\n", argv0);
576 if ((invcntl->postfile = vpfopen(invpost, ((stat == 0) ? "rb" : "r+b"))) == NULL) {
577 /* exact same naming convention hacks as above for invname */
578 if (!invflipname(invpost, INVPOST2, INVPOST)) {
579 if ((invcntl->postfile = vpfopen(invpost, ((stat == 0) ? "rb" : "r+b"))))
581 invflipname(invpost, INVPOST, INVPOST2); /* change back for err msg */
582 } else if (!invflipname(invpost, INVPOST, INVPOST2)) {
583 if ((invcntl->postfile = vpfopen(invpost,((stat == 0)?"rb":"r+b"))))
585 invflipname(invpost, INVPOST2, INVPOST); /* change back for err msg */
587 invcannotopen(invpost);
591 /* allocate core for a logical block */
592 if ((invcntl->logblk = malloc((unsigned) invcntl->param.sizeblk)) == NULL) {
593 invcannotalloc((unsigned) invcntl->param.sizeblk);
596 /* allocate for and read in superfinger */
598 invcntl->iindex = NULL;
600 if (invcntl->param.share == 1) {
602 struct shmid_ds shm_buf;
605 /* see if the shared segment exists */
606 shm_key = ftok(invname, 2);
607 shm_id = shmget(shm_key, 0, 0);
608 /* Failure simply means (hopefully) that segment doesn't exists */
610 /* Have to give general write permission due to AMdahl not having protected segments */
611 shm_id = shmget(shm_key, invcntl->param.supsize + sizeof(long), IPC_CREAT | 0666);
613 perror("Could not create shared memory segment");
618 invcntl->iindex = shmat(shm_id, 0, ((read_index) ? 0 : SHM_RDONLY));
619 if (invcntl->iindex == (char *)ERR) {
620 fprintf(stderr, "%s: shared memory link failed\n", argv0);
621 invcntl->iindex = NULL;
627 if (invcntl->iindex == NULL)
628 /* FIXME HBB: magic number alert (4) */
629 invcntl->iindex = malloc((unsigned) invcntl->param.supsize
631 if (invcntl->iindex == NULL) {
632 invcannotalloc((unsigned) invcntl->param.supsize);
633 free(invcntl->logblk);
637 fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
638 fread(invcntl->iindex, (int) invcntl->param.supsize, 1,
641 invcntl->numblk = -1;
642 if (boolready() == -1) {
644 fclose(invcntl->postfile);
646 fclose(invcntl->invfile);
649 /* write back out the control block if anything changed */
650 invcntl->param.filestat = stat;
651 if (stat > invcntl->param.filestat ) {
652 rewind(invcntl->invfile);
653 fwrite(&invcntl->param, sizeof(invcntl->param), 1, invcntl->invfile);
658 /** invclose must be called to wrap things up and deallocate core **/
660 invclose(INVCONTROL *invcntl)
662 /* write out the control block in case anything changed */
663 if (invcntl->param.filestat > 0) {
664 invcntl->param.filestat = 0;
665 rewind(invcntl->invfile);
666 fwrite(&invcntl->param, 1,
667 sizeof(invcntl->param), invcntl->invfile);
669 if (invcntl->param.filestat == INVALONE) {
670 /* write out the super finger */
671 fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
672 fwrite(invcntl->iindex, 1,
673 (int) invcntl->param.supsize, invcntl->invfile);
675 fclose(invcntl->invfile);
676 fclose(invcntl->postfile);
678 if (invcntl->param.share > 0) {
679 shmdt(invcntl->iindex);
680 invcntl->iindex = NULL;
683 if (invcntl->iindex != NULL)
684 free(invcntl->iindex);
685 free(invcntl->logblk);
688 /** invstep steps the inverted file forward one item **/
690 invstep(INVCONTROL *invcntl)
692 if (invcntl->keypnt < (invcntl->logblk->invblk[0] - 1)) {
697 /* move forward a block else wrap */
698 invcntl->numblk = invcntl->logblk->invblk[1]; /* was: *(int *)(invcntl->logblk + sizeof(long))*/
700 /* now read in the block */
701 fseek(invcntl->invfile,
702 invcntl->numblk*invcntl->param.sizeblk + invcntl->param.cntlsize,
704 fread(invcntl->logblk, (int) invcntl->param.sizeblk, 1,
709 /** invforward moves forward one term in the inverted file **/
711 invforward(INVCONTROL *invcntl)
714 /* skip things with 0 postings */
715 /* FIXME HBB: magic number alert! (3) */
716 while (((ENTRY * )(invcntl->logblk->invblk + 3) + invcntl->keypnt)->post == 0) {
719 /* Check for having wrapped - reached start of inverted file! */
720 if ((invcntl->numblk == 0) && (invcntl->keypnt == 0))
725 /** invterm gets the present term from the present logical block **/
727 invterm(INVCONTROL *invcntl, char *term)
731 /* FIXME HBB: magic number alert! (3) */
732 entryptr = (ENTRY *)(invcntl->logblk->invblk + 3) + invcntl->keypnt;
733 strncpy(term, invcntl->logblk->chrblk + entryptr->offset,
734 (int) entryptr->size);
735 *(term + entryptr->size) = '\0';
736 return(entryptr->post);
739 /** invfind searches for an individual item in the inverted file **/
741 invfind(INVCONTROL *invcntl, char *searchterm) /* term being searched for */
743 int imid, ilow, ihigh;
746 unsigned long *intptr, *intptr2;
749 /* make sure it is initialized via invready */
750 if (invcntl->invfile == 0)
753 /* now search for the appropriate finger block */
754 intptr = (unsigned long *)invcntl->iindex;
757 ihigh = *intptr++ - 1;
758 while (ilow <= ihigh) {
759 imid = (ilow + ihigh) / 2;
760 intptr2 = intptr + imid;
761 i = strcmp(searchterm, (invcntl->iindex + *intptr2));
771 /* be careful about case where searchterm is after last in this block */
772 imid = (ilow) ? ilow - 1 : 0;
774 /* fetch the appropriate logical block if not in core */
775 /* note always fetch it if the file is busy */
776 if ((imid != invcntl->numblk) || (invcntl->param.filestat >= INVBUSY)) {
777 fseek(invcntl->invfile,
778 (imid*invcntl->param.sizeblk) + invcntl->param.cntlsize,
780 invcntl->numblk = imid;
781 fread(invcntl->logblk, (int)invcntl->param.sizeblk, 1,
786 /* now find the term in this block. tricky this */
787 intptr = (unsigned long *) invcntl->logblk->invblk;
793 while (ilow <= ihigh) {
794 imid = (ilow + ihigh) / 2;
795 entryptr = (ENTRY *)intptr + imid;
796 i = strncmp(searchterm, invcntl->logblk->chrblk + entryptr->offset,
797 (int) entryptr->size );
799 i = strlen(searchterm) - entryptr->size;
805 num = entryptr->post;
809 /* be careful about case where searchterm is after last in this block */
810 if (imid >= invcntl->logblk->invblk[0]) {
811 invcntl->keypnt = invcntl->logblk->invblk[0];
813 /* note if this happens the term could be in extended block */
814 if (invcntl->param.startbyte < invcntl->numblk * invcntl->param.sizeblk)
817 invcntl->keypnt = imid;
823 /** invdump dumps the block the term parameter is in **/
825 invdump(INVCONTROL *invcntl, char *term)
827 long i, j, n, *longptr;
829 char temp[512], *ptr;
831 /* dump superindex if term is "-" */
834 longptr = (long *)invcntl->iindex;
836 printf("Superindex dump, num blocks=%ld\n", n);
838 while ((longptr <= ((long *)invcntl->iindex) + n) && invbreak == 0) {
839 printf("%2ld %6ld %s\n", j++, *longptr, invcntl->iindex + *longptr);
843 } else if (*term == '#') {
845 /* fetch the appropriate logical block */
847 fseek(invcntl->invfile,
848 (j * invcntl->param.sizeblk) + invcntl->param.cntlsize,
850 fread(invcntl->logblk, (int) invcntl->param.sizeblk, 1,
853 i = abs((int) invfind(invcntl, term));
854 longptr = invcntl->logblk->invblk;
856 printf("Entry term to invdump=%s, postings=%ld, forwrd ptr=%ld, back ptr=%ld\n"
857 , term, i, *(longptr), *(longptr + 1));
858 /* FIXME HBB: magic number alert! (3) */
859 entryptr = (ENTRY *) (invcntl->logblk->invblk + 3);
860 printf("%ld terms in this block, block=%ld\n", n, invcntl->numblk);
861 printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n");
862 for (j = 0; j < n && invbreak == 0; j++) {
863 ptr = invcntl->logblk->chrblk + entryptr->offset;
864 strncpy(temp, ptr, (int) entryptr->size);
865 temp[entryptr->size] = '\0';
866 ptr += (sizeof(long) * (long)((entryptr->size + (sizeof(long) - 1)) / sizeof(long)));
867 printf("%2ld %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp, entryptr->post,
868 entryptr->size, entryptr->offset, entryptr->space,
882 if ((item1 = malloc(SETINC * sizeof(POSTING))) == NULL) {
883 invcannotalloc(SETINC);
889 if ((item2 = malloc(SETINC * sizeof(POSTING))) == NULL) {
890 invcannotalloc(SETINC);
907 boolfile(INVCONTROL *invcntl, long *num, int boolarg)
913 POSTING *newitem = NULL; /* initialize, to avoid warning */
916 POSTING *newsetp = NULL, *set1p;
917 long newsetc, set1c, set2c;
919 /* FIXME HBB: magic number alert! (3) */
920 entryptr = (ENTRY *) (invcntl->logblk->invblk + 3) + invcntl->keypnt;
921 ptr = invcntl->logblk->chrblk + entryptr->offset;
922 ptr2 = ((unsigned long *) ptr) + (entryptr->size + (sizeof(long) - 1)) / sizeof(long);
923 *num = entryptr->post;
932 /* make room for the new set */
948 if ((item1 = realloc(
949 item1, u * sizeof(POSTING))) == NULL) {
959 if ((item2 = realloc(
960 item2, u * sizeof(POSTING))) == NULL) {
962 invcannotalloc(u * sizeof(POSTING));
971 #if 0 /* this write is only need by commented-out code later */
976 file = invcntl->postfile;
977 fseek(file, *ptr2, SEEK_SET);
978 fread(&posting, sizeof(posting), 1, file);
982 /* while something in both sets */
985 for (set1c = 0, set2c = 0;
986 set1c < numitems && set2c < *num; newsetc++) {
987 if (set1p->lineoffset < posting.lineoffset) {
988 *newsetp++ = *set1p++;
991 else if (set1p->lineoffset > posting.lineoffset) {
992 *newsetp++ = posting;
993 fread(&posting, (int) sizeof(posting), 1, file);
996 else if (set1p->type < posting.type) {
997 *newsetp++ = *set1p++;
1000 else if (set1p->type > posting.type) {
1001 *newsetp++ = posting;
1002 fread(&posting, (int) sizeof(posting), 1, file);
1005 else { /* identical postings */
1006 *newsetp++ = *set1p++;
1008 fread(&posting, (int) sizeof(posting), 1, file);
1012 /* find out what ran out and move the rest in */
1013 if (set1c < numitems) {
1014 newsetc += numitems - set1c;
1015 while (set1c++ < numitems) {
1016 *newsetp++ = *set1p++;
1019 while (set2c++ < *num) {
1020 *newsetp++ = posting;
1022 fread(&posting, (int) sizeof(posting), 1, file);
1026 break; /* end of BOOL_OR */
1029 for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
1030 if (set1p->lineoffset < posting.lineoffset) {
1034 else if (set1p->lineoffset > posting.lineoffset) {
1035 fread(&posting, (int) sizeof(posting), 1, file);
1038 else if (set1p->type < posting.type) {
1042 else if (set1p->type > posting.type) {
1043 fread(&posting, (int) sizeof(posting), 1, file);
1046 else { /* identical postings */
1047 *newsetp++ = *set1p++;
1050 fread(&posting, (int) sizeof(posting), 1, file);
1054 break; /* end of AND */
1057 for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
1058 if (set1p->lineoffset < posting.lineoffset) {
1059 *newsetp++ = *set1p++;
1063 else if (set1p->lineoffset > posting.lineoffset) {
1064 fread(&posting, (int) sizeof(posting), 1, file);
1067 else if (set1p->type < posting.type) {
1068 *newsetp++ = *set1p++;
1072 else if (set1p->type > posting.type) {
1073 fread(&posting, (int) sizeof(posting), 1, file);
1076 else { /* identical postings */
1079 fread(&posting, (int) sizeof(posting), 1, file);
1083 newsetc += numitems - set1c;
1084 while (set1c++ < numitems) {
1085 *newsetp++ = *set1p++;
1087 break; /* end of NOT */
1089 case REVERSENOT: /* core NOT incoming set */
1090 for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
1091 if (set1p->lineoffset < posting.lineoffset) {
1095 else if (set1p->lineoffset > posting.lineoffset) {
1096 *newsetp++ = posting;
1097 fread(&posting, (int) sizeof(posting), 1, file);
1100 else if (set1p->type < posting.type) {
1104 else if (set1p->type > posting.type) {
1105 *newsetp++ = posting;
1106 fread(&posting, (int) sizeof(posting), 1, file);
1109 else { /* identical postings */
1112 fread(&posting, (int) sizeof(posting), 1, file);
1116 while (set2c++ < *num) {
1117 *newsetp++ = posting;
1119 fread(&posting, (int) sizeof(posting), 1, file);
1122 break; /* end of REVERSENOT */
1127 enditem = (POSTING *) newsetp;
1128 return((POSTING *) item);
1133 boolsave(int clear) /* flag about whether to clear core */
1137 POSTING *oldstuff, *newstuff;
1139 if (numitems == 0) {
1144 /* if clear then give them what we have and use boolready to realloc */
1147 /* free up the space we didn't give them */
1155 i = (enditem - item) * sizeof(POSTING) + 100;
1156 if ((ptr = malloc(i))r == NULL) {
1160 /* move present set into place */
1163 while (oldstuff < enditem)
1164 *newstuff++ = *oldstuff++;
1170 invcannotalloc(unsigned n)
1172 fprintf(stderr, "%s: cannot allocate %u bytes\n", argv0, n);
1176 invcannotopen(char *file)
1178 fprintf(stderr, "%s: cannot open file %s\n", argv0, file);
1182 invcannotwrite(char *file)
1184 perror(argv0); /* must be first to preserve errno */
1185 fprintf(stderr, "%s: write to file %s failed\n", argv0, file);