2 /* Copyright (C) 1989-2014 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
42 int load(int fd, const char *filename);
43 const char *get_start() const;
44 const char *get_end() const;
47 typedef unsigned char uchar;
49 static uchar map[256];
50 static uchar inv_map[256][3];
56 static map_init the_map_init;
61 for (i = 0; i < 256; i++)
62 map[i] = csalnum(i) ? cmlower(i) : '\0';
63 for (i = 0; i < 256; i++) {
66 inv_map[i][1] = cmupper(i);
69 else if (csdigit(i)) {
84 bmpattern(const char *pattern, int pattern_length);
86 const char *search(const char *p, const char *end) const;
90 bmpattern::bmpattern(const char *pattern, int pattern_length)
95 for (i = 0; i < len; i++)
96 pat[i] = map[uchar(pattern[i])];
97 for (i = 0; i < 256; i++)
99 for (i = 0; i < len; i++)
100 for (const unsigned char *inv = inv_map[uchar(pat[i])]; *inv; inv++)
101 delta[*inv] = len - i - 1;
104 const char *bmpattern::search(const char *buf, const char *end) const
106 int buflen = end - buf;
111 strend = end - len*4;
114 const char *k = buf + len - 1;
115 const int *del = delta;
116 const char *pattern = pat;
119 int t = del[uchar(*k)];
126 while (k < end && del[uchar(*k)] != 0)
135 if (map[uchar(*--s)] != uchar(pattern[--j]))
143 bmpattern::~bmpattern()
148 inline int bmpattern::length() const
154 static const char *find_end(const char *bufend, const char *p);
156 const char *linear_searcher::search_and_check(const bmpattern *key,
157 const char *buf, const char *bufend, const char **start) const
159 assert(buf[-1] == '\n');
160 assert(bufend[-1] == '\n');
161 const char *ptr = buf;
163 const char *found = key->search(ptr, bufend);
166 if (check_match(buf, bufend, found, key->length(), &ptr, start))
172 static const char *skip_field(const char *end, const char *p)
176 if (p == end || *p == '%')
179 for (q = p; *q == ' ' || *q == '\t'; q++)
188 static const char *find_end(const char *bufend, const char *p)
195 for (q = p; *q == ' ' || *q == '\t'; q++)
205 int linear_searcher::check_match(const char *buf, const char *bufend,
206 const char *match, int matchlen,
207 const char **cont, const char **start) const
210 // The user is required to supply only the first truncate_len characters
211 // of the key. If truncate_len <= 0, he must supply all the key.
212 if ((truncate_len <= 0 || matchlen < truncate_len)
213 && map[uchar(match[matchlen])] != '\0')
216 // The character before the match must not be an alphanumeric
217 // character (unless the alphanumeric character follows one or two
218 // percent characters at the beginning of the line), nor must it be
219 // a percent character at the beginning of a line, nor a percent
220 // character following a percent character at the beginning of a
223 switch (match - buf) {
227 if (match[-1] == '%' || map[uchar(match[-1])] != '\0')
231 if (map[uchar(match[-1])] != '\0' && match[-2] != '%')
234 && (match[-2] == '\n' || match[-2] == '%'))
238 if (map[uchar(match[-1])] != '\0'
239 && !(match[-2] == '%'
240 && (match[-3] == '\n'
241 || (match[-3] == '%' && match[-4] == '\n'))))
244 && (match[-2] == '\n'
245 || (match[-2] == '%' && match[-3] == '\n')))
249 const char *p = match;
253 if (!had_percent && p[1] == '%') {
254 if (p[2] != '\0' && strchr(ignore_fields, p[2]) != 0) {
255 *cont = skip_field(bufend, match + matchlen);
268 for (q = p - 1; *q == ' ' || *q == '\t'; q--)
282 file_buffer::file_buffer()
283 : buffer(0), bufend(0)
287 file_buffer::~file_buffer()
292 const char *file_buffer::get_start() const
294 return buffer ? buffer + 4 : 0;
297 const char *file_buffer::get_end() const
302 int file_buffer::load(int fd, const char *filename)
305 if (fstat(fd, &sb) < 0)
306 error("can't fstat `%1': %2", filename, strerror(errno));
307 else if (!S_ISREG(sb.st_mode))
308 error("`%1' is not a regular file", filename);
310 // We need one character extra at the beginning for an additional newline
311 // used as a sentinel. We get 4 instead so that the read buffer will be
312 // word-aligned. This seems to make the read slightly faster. We also
313 // need one character at the end also for an additional newline used as a
315 int size = int(sb.st_size);
316 buffer = new char[size + 4 + 1];
317 int nread = read(fd, buffer + 4, size);
319 error("error reading `%1': %2", filename, strerror(errno));
320 else if (nread != size)
321 error("size of `%1' decreased", filename);
324 nread = read(fd, &c, 1);
326 error("size of `%1' increased", filename);
327 else if (memchr(buffer + 4, '\0', size < 1024 ? size : 1024) != 0)
328 error("database `%1' is a binary file", filename);
332 int sidx = 4, didx = 4;
333 for ( ; sidx < size + 4; sidx++, didx++)
335 if (buffer[sidx] == '\r')
337 if (buffer[++sidx] != '\n')
338 buffer[didx++] = '\r';
343 buffer[didx] = buffer[sidx];
345 bufend = buffer + 4 + size;
346 if (bufend[-1] != '\n')
358 linear_searcher::linear_searcher(const char *query, int query_len,
359 const char *ign, int trunc)
360 : ignore_fields(ign), truncate_len(trunc), keys(0), nkeys(0)
362 const char *query_end = query + query_len;
365 for (p = query; p < query_end; p++)
366 if (map[uchar(*p)] != '\0'
367 && (p[1] == '\0' || map[uchar(p[1])] == '\0'))
371 keys = new bmpattern*[nk];
374 while (p < query_end && map[uchar(*p)] == '\0')
378 const char *start = p;
379 while (p < query_end && map[uchar(*p)] != '\0')
381 keys[nkeys++] = new bmpattern(start, p - start);
390 linear_searcher::~linear_searcher()
392 for (int i = 0; i < nkeys; i++)
397 int linear_searcher::search(const char *buffer, const char *bufend,
398 const char **startp, int *lengthp) const
400 assert(bufend - buffer > 0);
401 assert(buffer[-1] == '\n');
402 assert(bufend[-1] == '\n');
406 const char *refstart;
407 const char *found = search_and_check(keys[0], buffer, bufend, &refstart);
410 const char *refend = find_end(bufend, found + keys[0]->length());
412 for (i = 1; i < nkeys; i++)
413 if (!search_and_check(keys[i], refstart, refend))
417 *lengthp = refend - refstart;
425 class linear_search_item : public search_item {
428 linear_search_item(const char *filename, int fid);
429 ~linear_search_item();
431 search_item_iterator *make_search_item_iterator(const char *);
432 friend class linear_search_item_iterator;
435 class linear_search_item_iterator : public search_item_iterator {
436 linear_search_item *lsi;
439 linear_search_item_iterator(linear_search_item *, const char *query);
440 ~linear_search_item_iterator();
441 int next(const linear_searcher &, const char **ptr, int *lenp,
445 search_item *make_linear_search_item(int fd, const char *filename, int fid)
447 linear_search_item *item = new linear_search_item(filename, fid);
448 if (!item->load(fd)) {
456 linear_search_item::linear_search_item(const char *filename, int fid)
457 : search_item(filename, fid)
461 linear_search_item::~linear_search_item()
465 int linear_search_item::load(int fd)
467 return fbuf.load(fd, name);
470 search_item_iterator *linear_search_item::make_search_item_iterator(
473 return new linear_search_item_iterator(this, query);
476 linear_search_item_iterator::linear_search_item_iterator(
477 linear_search_item *p, const char *)
482 linear_search_item_iterator::~linear_search_item_iterator()
486 int linear_search_item_iterator::next(const linear_searcher &searcher,
487 const char **startp, int *lengthp,
490 const char *bufstart = lsi->fbuf.get_start();
491 const char *bufend = lsi->fbuf.get_end();
492 const char *ptr = bufstart + pos;
493 if (ptr < bufend && searcher.search(ptr, bufend, startp, lengthp)) {
494 pos = *startp + *lengthp - bufstart;
496 *ridp = reference_id(lsi->filename_id, *startp - bufstart);