1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
28 #include "camel-memchunk.h"
29 #include "camel-trie.h"
34 struct _trie_state *next;
35 struct _trie_state *fail;
36 struct _trie_match *match;
42 struct _trie_match *next;
43 struct _trie_state *state;
50 * A trie data structure.
55 struct _trie_state root;
56 GPtrArray *fail_states;
59 CamelMemChunk *match_chunks;
60 CamelMemChunk *state_chunks;
63 static inline gunichar
64 trie_utf8_getc (const guchar **in,
67 register const guchar *inptr = *in;
68 const guchar *inend = inptr + inlen;
70 register gunichar u, m;
79 } else if (r < 0xfe) { /* valid start char? */
81 m = 0x7f80; /* used to mask out the length bits */
87 if ((c & 0xc0) != 0x80)
90 u = (u << 6) | (c & 0x3f);
109 * @icase: Case sensitivity for the #CamelTrie.
111 * Creates a new #CamelTrie. If @icase is %TRUE, then pattern matching
112 * done by the CamelTrie will be case insensitive.
114 * Returns: The newly-created #CamelTrie.
119 camel_trie_new (gboolean icase)
123 trie = g_new (CamelTrie, 1);
124 trie->root.next = NULL;
125 trie->root.fail = NULL;
126 trie->root.match = NULL;
127 trie->root.final = 0;
129 trie->fail_states = g_ptr_array_sized_new (8);
132 trie->match_chunks = camel_memchunk_new (8, sizeof (struct _trie_match));
133 trie->state_chunks = camel_memchunk_new (8, sizeof (struct _trie_state));
140 * @trie: The #CamelTrie to free.
142 * Frees the memory associated with the #CamelTrie @trie.
147 camel_trie_free (CamelTrie *trie)
149 g_ptr_array_free (trie->fail_states, TRUE);
150 camel_memchunk_destroy (trie->match_chunks);
151 camel_memchunk_destroy (trie->state_chunks);
155 static struct _trie_match *
156 g (struct _trie_state *s,
159 struct _trie_match *m = s->match;
161 while (m && m->c != c)
167 static struct _trie_state *
168 trie_insert (CamelTrie *trie,
170 struct _trie_state *q,
173 struct _trie_match *m;
175 m = camel_memchunk_alloc (trie->match_chunks);
180 q = m->state = camel_memchunk_alloc (trie->state_chunks);
182 q->fail = &trie->root;
186 if (trie->fail_states->len < depth + 1) {
187 guint size = trie->fail_states->len;
189 size = MAX (size + 64, depth + 1);
190 g_ptr_array_set_size (trie->fail_states, size);
193 q->next = trie->fail_states->pdata[depth];
194 trie->fail_states->pdata[depth] = q;
201 dump_trie (struct _trie_state *s,
204 gchar *p = g_alloca ((depth * 2) + 1);
205 struct _trie_match *m;
207 memset (p, ' ', depth * 2);
211 stderr, "%s[state] %p: final=%d; pattern-id=%d; fail=%p\n",
212 p, s, s->final, s->id, s->fail);
215 fprintf (stderr, " %s'%c' -> %p\n", p, m->c, m->state);
217 dump_trie (m->state, depth + 1);
229 * IF g(q, pat[p][j]) == null
230 * insert(q, pat[p][j])
232 * q = g(q, pat[p][j])
234 * final = union(final, q)
240 * @trie: The #CamelTrie to add a pattern to.
241 * @pattern: The pattern to add.
242 * @pattern_id: The id to use for the pattern.
244 * Add a new pattern to the #CamelTrie @trie.
249 camel_trie_add (CamelTrie *trie,
250 const gchar *pattern,
253 const guchar *inptr = (const guchar *) pattern;
254 struct _trie_state *q, *q1, *r;
255 struct _trie_match *m, *n;
259 /* Step 1: add the pattern to the trie */
263 while ((c = trie_utf8_getc (&inptr, -1))) {
265 c = g_unichar_tolower (c);
269 q = trie_insert (trie, depth, q, c);
280 /* Step 2: compute failure graph */
282 for (i = 0; i < trie->fail_states->len; i++) {
283 q = trie->fail_states->pdata[i];
290 while (r && (n = g (r, c)) == NULL)
295 if (q1->fail->final > q1->final)
296 q1->final = q1->fail->final;
298 if ((n = g (&trie->root, c)))
301 q1->fail = &trie->root;
311 d (fprintf (stderr, "\nafter adding pattern '%s' to trie %p:\n", pattern, trie));
312 d (dump_trie (&trie->root, 0));
320 * WHILE q != fail AND g(q, text[i]) == fail
328 * IF isElement(q, final)
337 * @trie: The #CamelTrie to search in.
338 * @buffer: The string to match against a pattern in @trie.
339 * @buflen: The length of @buffer.
340 * @matched_id: An integer address to store the matched pattern id in.
342 * Try to match the string @buffer with a pattern in @trie.
344 * Returns: The matched pattern, or %NULL if no pattern is matched.
349 camel_trie_search (CamelTrie *trie,
354 const guchar *inptr, *inend, *prev, *pat;
355 register gsize inlen = buflen;
356 struct _trie_state *q;
357 struct _trie_match *m = NULL; /* init to please gcc */
360 inptr = (const guchar *) buffer;
361 inend = inptr + buflen;
365 while ((c = trie_utf8_getc (&inptr, inlen))) {
366 inlen = (inend - inptr);
370 c = g_unichar_tolower (c);
372 while (q != NULL && (m = g (q, c)) == NULL)
375 if (q == &trie->root)
381 } else if (m != NULL) {
388 return (const gchar *) pat;