2 /* Copyright (C) 1989-2014 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
40 void *mapread(int fd, int len);
41 int unmap(void *, int len);
53 class index_search_item : public search_item {
54 search_item *out_of_date_files;
64 char *filename_buffer;
66 char **common_words_table;
67 int common_words_table_size;
68 const char *ignore_fields;
71 const char *do_verify();
72 const int *search1(const char **pp, const char *end);
73 const int *search(const char *ptr, int length, int **temp_listp);
74 const char *munge_filename(const char *);
75 void read_common_words_file();
76 void add_out_of_date_file(int fd, const char *filename, int fid);
78 index_search_item(const char *, int);
81 search_item_iterator *make_search_item_iterator(const char *);
84 int next_filename_id() const;
85 friend class index_search_item_iterator;
88 class index_search_item_iterator : public search_item_iterator {
89 index_search_item *indx;
90 search_item_iterator *out_of_date_files_iter;
91 search_item *next_out_of_date_file;
92 const int *found_list;
96 linear_searcher searcher;
98 int get_tag(int tagno, const linear_searcher &, const char **, int *,
101 index_search_item_iterator(index_search_item *, const char *);
102 ~index_search_item_iterator();
103 int next(const linear_searcher &, const char **, int *, reference_id *);
107 index_search_item::index_search_item(const char *filename, int fid)
108 : search_item(filename, fid), out_of_date_files(0), buffer(0), map_addr(0),
109 map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0),
110 common_words_table(0)
114 index_search_item::~index_search_item()
119 if (unmap(map_addr, map_len) < 0)
120 error("unmap: %1", strerror(errno));
122 while (out_of_date_files) {
123 search_item *tem = out_of_date_files;
124 out_of_date_files = out_of_date_files->next;
127 a_delete filename_buffer;
129 if (common_words_table) {
130 for (int i = 0; i < common_words_table_size; i++)
131 a_delete common_words_table[i];
132 a_delete common_words_table;
139 file_closer(int &fd) : fdp(&fd) { }
140 ~file_closer() { close(*fdp); }
143 // Tell the compiler that a variable is intentionally unused.
144 inline void unused(void *) { }
146 int index_search_item::load(int fd)
148 file_closer fd_closer(fd); // close fd on return
151 if (fstat(fd, &sb) < 0) {
152 error("can't fstat `%1': %2", name, strerror(errno));
155 if (!S_ISREG(sb.st_mode)) {
156 error("`%1' is not a regular file", name);
160 int size = int(sb.st_size);
162 map_addr = mapread(fd, size);
164 addr = (char *)map_addr;
168 addr = buffer = (char *)malloc(size);
170 error("can't allocate buffer for `%1'", name);
174 int bytes_to_read = size;
175 while (bytes_to_read > 0) {
176 int nread = read(fd, ptr, bytes_to_read);
178 error("unexpected EOF on `%1'", name);
182 error("read error on `%1': %2", name, strerror(errno));
185 bytes_to_read -= nread;
189 header = *(index_header *)addr;
190 if (header.magic != INDEX_MAGIC) {
191 error("`%1' is not an index file: wrong magic number", name);
194 if (header.version != INDEX_VERSION) {
195 error("version number in `%1' is wrong: was %2, should be %3",
196 name, header.version, INDEX_VERSION);
199 int sz = (header.tags_size * sizeof(tag)
200 + header.lists_size * sizeof(int)
201 + header.table_size * sizeof(int)
202 + header.strings_size
205 error("size of `%1' is wrong: was %2, should be %3",
209 tags = (tag *)(addr + sizeof(header));
210 lists = (int *)(tags + header.tags_size);
211 table = (int *)(lists + header.lists_size);
212 pool = (char *)(table + header.table_size);
213 ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1;
214 key_buffer = new char[header.truncate];
215 read_common_words_file();
219 const char *index_search_item::do_verify()
223 if (lists[header.lists_size - 1] >= 0)
224 return "last list element not negative";
226 for (i = 0; i < header.table_size; i++) {
228 if (li >= header.lists_size)
229 return "bad list index";
231 for (int *ptr = lists + li; *ptr >= 0; ptr++) {
232 if (*ptr >= header.tags_size)
233 return "bad tag index";
234 if (*ptr >= ptr[1] && ptr[1] >= 0)
235 return "list not ordered";
239 for (i = 0; i < header.tags_size; i++) {
240 if (tags[i].filename_index >= header.strings_size)
241 return "bad index in tags";
242 if (tags[i].length < 0)
243 return "bad length in tags";
244 if (tags[i].start < 0)
245 return "bad start in tags";
247 if (pool[header.strings_size - 1] != '\0')
248 return "last character in pool not nul";
252 int index_search_item::verify()
254 const char *reason = do_verify();
257 error("`%1' is bad: %2", name, reason);
261 int index_search_item::next_filename_id() const
263 return filename_id + header.strings_size + 1;
266 search_item_iterator *index_search_item::make_search_item_iterator(
269 return new index_search_item_iterator(this, query);
272 search_item *make_index_search_item(const char *filename, int fid)
274 char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)];
275 strcpy(index_filename, filename);
276 strcat(index_filename, INDEX_SUFFIX);
277 int fd = open(index_filename, O_RDONLY | O_BINARY);
280 index_search_item *item = new index_search_item(index_filename, fid);
281 a_delete index_filename;
282 if (!item->load(fd)) {
287 else if (verify_flag && !item->verify()) {
298 index_search_item_iterator::index_search_item_iterator(index_search_item *ind,
300 : indx(ind), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0),
302 searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate),
305 found_list = indx->search(q, strlen(q), &temp_list);
307 found_list = &minus_one;
308 warning("all keys would have been discarded in constructing index `%1'",
313 index_search_item_iterator::~index_search_item_iterator()
318 delete out_of_date_files_iter;
321 int index_search_item_iterator::next(const linear_searcher &,
322 const char **pp, int *lenp,
327 int tagno = *found_list;
331 if (get_tag(tagno, searcher, pp, lenp, ridp))
335 next_out_of_date_file = indx->out_of_date_files;
337 while (next_out_of_date_file) {
338 if (out_of_date_files_iter == 0)
339 out_of_date_files_iter
340 = next_out_of_date_file->make_search_item_iterator(query);
341 if (out_of_date_files_iter->next(searcher, pp, lenp, ridp))
343 delete out_of_date_files_iter;
344 out_of_date_files_iter = 0;
345 next_out_of_date_file = next_out_of_date_file->next;
350 int index_search_item_iterator::get_tag(int tagno,
351 const linear_searcher &searchr,
352 const char **pp, int *lenp,
355 if (tagno < 0 || tagno >= indx->header.tags_size) {
356 error("bad tag number");
359 tag *tp = indx->tags + tagno;
360 const char *filename = indx->munge_filename(indx->pool + tp->filename_index);
361 int fd = open(filename, O_RDONLY | O_BINARY);
363 error("can't open `%1': %2", filename, strerror(errno));
367 if (fstat(fd, &sb) < 0) {
368 error("can't fstat: %1", strerror(errno));
372 time_t mtime = sb.st_mtime;
373 if (mtime > indx->mtime) {
374 indx->add_out_of_date_file(fd, filename,
375 indx->filename_id + tp->filename_index);
379 FILE *fp = fdopen(fd, FOPEN_RB);
381 error("fdopen failed");
385 if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0)
386 error("can't seek on `%1': %2", filename, strerror(errno));
388 int length = tp->length;
391 if (fstat(fileno(fp), &sb) < 0) {
392 error("can't stat `%1': %2", filename, strerror(errno));
395 else if (!S_ISREG(sb.st_mode)) {
396 error("`%1' is not a regular file", filename);
400 length = int(sb.st_size);
403 if (length + 2 > buflen) {
406 buf = new char[buflen];
408 if (fread(buf + 1, 1, length, fp) != (size_t)length)
409 error("fread on `%1' failed: %2", filename, strerror(errno));
412 // Remove the CR characters from CRLF pairs.
413 int sidx = 1, didx = 1;
414 for ( ; sidx < length + 1; sidx++, didx++)
416 if (buf[sidx] == '\r')
418 if (buf[++sidx] != '\n')
424 buf[didx] = buf[sidx];
426 buf[length + 1] = '\n';
427 res = searchr.search(buf + 1, buf + 2 + length, pp, lenp);
429 *ridp = reference_id(indx->filename_id + tp->filename_index,
438 const char *index_search_item::munge_filename(const char *filename)
440 if (IS_ABSOLUTE(filename))
442 const char *cwd = pool;
443 int need_slash = (cwd[0] != 0
444 && strchr(DIR_SEPS, strchr(cwd, '\0')[-1]) == 0);
445 int len = strlen(cwd) + strlen(filename) + need_slash + 1;
446 if (len > filename_buflen) {
447 a_delete filename_buffer;
448 filename_buflen = len;
449 filename_buffer = new char[len];
451 strcpy(filename_buffer, cwd);
453 strcat(filename_buffer, "/");
454 strcat(filename_buffer, filename);
455 return filename_buffer;
458 const int *index_search_item::search1(const char **pp, const char *end)
460 while (*pp < end && !csalnum(**pp))
464 const char *start = *pp;
465 while (*pp < end && csalnum(**pp))
467 int len = *pp - start;
468 if (len < header.shortest)
470 if (len > header.truncate)
471 len = header.truncate;
473 for (int i = 0; i < len; i++)
474 if (csdigit(start[i]))
475 key_buffer[i] = start[i];
477 key_buffer[i] = cmlower(start[i]);
480 if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9'))
482 unsigned hc = hash(key_buffer, len);
483 if (common_words_table) {
484 for (int h = hc % common_words_table_size;
485 common_words_table[h];
487 if (strlen(common_words_table[h]) == (size_t)len
488 && memcmp(common_words_table[h], key_buffer, len) == 0)
491 h = common_words_table_size;
494 int li = table[int(hc % header.table_size)];
495 return li < 0 ? &minus_one : lists + li;
498 static void merge(int *result, const int *s1, const int *s2)
500 for (; *s1 >= 0; s1++) {
501 while (*s2 >= 0 && *s2 < *s1)
509 const int *index_search_item::search(const char *ptr, int length,
512 const char *end = ptr + length;
514 a_delete *temp_listp;
517 const int *first_list = 0;
518 while (ptr < end && (first_list = search1(&ptr, end)) == 0)
524 const int *second_list = 0;
525 while (ptr < end && (second_list = search1(&ptr, end)) == 0)
529 if (*second_list < 0)
532 for (p = first_list; *p >= 0; p++)
534 int len = p - first_list;
535 for (p = second_list; *p >= 0; p++)
537 if (p - second_list < len)
538 len = p - second_list;
539 int *matches = new int[len + 1];
540 merge(matches, first_list, second_list);
542 const int *list = search1(&ptr, end);
548 merge(matches, matches, list);
555 *temp_listp = matches;
559 void index_search_item::read_common_words_file()
561 if (header.common <= 0)
563 const char *common_words_file = munge_filename(strchr(pool, '\0') + 1);
565 FILE *fp = fopen(common_words_file, "r");
567 error("can't open `%1': %2", common_words_file, strerror(errno));
570 common_words_table_size = 2*header.common + 1;
571 while (!is_prime(common_words_table_size))
572 common_words_table_size++;
573 common_words_table = new char *[common_words_table_size];
574 for (int i = 0; i < common_words_table_size; i++)
575 common_words_table[i] = 0;
580 while (c != EOF && !csalnum(c))
585 if (key_len < header.truncate)
586 key_buffer[key_len++] = cmlower(c);
588 } while (c != EOF && csalnum(c));
589 if (key_len >= header.shortest) {
590 int h = hash(key_buffer, key_len) % common_words_table_size;
591 while (common_words_table[h]) {
593 h = common_words_table_size;
596 common_words_table[h] = new char[key_len + 1];
597 memcpy(common_words_table[h], key_buffer, key_len);
598 common_words_table[h][key_len] = '\0';
600 if (++count >= header.common)
609 void index_search_item::add_out_of_date_file(int fd, const char *filename,
613 for (pp = &out_of_date_files; *pp; pp = &(*pp)->next)
614 if ((*pp)->is_named(filename))
616 *pp = make_linear_search_item(fd, filename, fid);
617 warning("`%1' modified since `%2' created", filename, name);
620 void index_search_item::check_files()
622 const char *pool_end = pool + header.strings_size;
623 for (const char *ptr = strchr(ignore_fields, '\0') + 1;
625 ptr = strchr(ptr, '\0') + 1) {
626 const char *path = munge_filename(ptr);
628 if (stat(path, &sb) < 0)
629 error("can't stat `%1': %2", path, strerror(errno));
630 else if (sb.st_mtime > mtime) {
631 int fd = open(path, O_RDONLY | O_BINARY);
633 error("can't open `%1': %2", path, strerror(errno));
635 add_out_of_date_file(fd, path, filename_id + (ptr - pool));