1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */
3 * Copyright (C) 2001-2003 Ximian Inc.
5 * Authors: Michael Zucchi <notzed@ximian.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of version 2 of the GNU Lesser General Public
9 * License as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
34 #include <sys/types.h>
37 #include <glib/gstdio.h>
39 #include <libedataserver/e-memory.h>
40 #include <libedataserver/e-msgport.h>
42 #include "camel-block-file.h"
43 #include "camel-object.h"
44 #include "camel-partition-table.h"
45 #include "camel-private.h"
46 #include "camel-text-index.h"
51 #define d(x) /*(printf("%s(%d):%s: ", __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/
56 #define CAMEL_TEXT_INDEX_MAX_WORDLEN (36)
58 #define CAMEL_TEXT_INDEX_LOCK(kf, lock) \
59 (g_static_rec_mutex_lock(&((CamelTextIndex *)kf)->priv->lock))
60 #define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) \
61 (g_static_rec_mutex_unlock(&((CamelTextIndex *)kf)->priv->lock))
63 static int text_index_compress_nosync(CamelIndex *idx);
65 /* ********************************************************************** */
67 /* "private" data, shared between index/cursor/name classes */
68 typedef struct _CamelTextIndexNamePrivate CamelTextIndexNamePrivate;
70 struct _CamelTextIndexNamePrivate {
76 CamelTextIndexName *camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid);
78 /* ****************************** */
80 typedef struct _CamelTextIndexCursorPrivate CamelTextIndexCursorPrivate;
82 struct _CamelTextIndexCursorPrivate {
94 CamelTextIndexCursor *camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data);
96 /* ****************************** */
97 typedef struct _CamelTextIndexKeyCursorPrivate CamelTextIndexKeyCursorPrivate;
99 struct _CamelTextIndexKeyCursorPrivate {
100 CamelKeyTable *table;
108 CamelTextIndexKeyCursor *camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table);
110 /* ********************************************************************** */
112 #define CAMEL_TEXT_INDEX_VERSION "TEXT.000"
113 #define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000"
115 struct _CamelTextIndexPrivate {
116 CamelBlockFile *blocks;
119 CamelKeyTable *word_index;
120 CamelPartitionTable *word_hash;
122 CamelKeyTable *name_index;
123 CamelPartitionTable *name_hash;
125 /* Cache of words to write */
126 int word_cache_limit;
127 int word_cache_count;
130 GStaticRecMutex lock;
133 /* Root block of text index */
134 struct _CamelTextIndexRoot {
135 struct _CamelBlockRoot root;
137 /* FIXME: the index root could contain a pointer to the hash root */
138 camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */
139 camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
141 camel_block_t name_index_root; /* same, for names */
142 camel_block_t name_hash_root;
144 guint32 words; /* total words */
145 guint32 names; /* total names */
146 guint32 deleted; /* deleted names */
147 guint32 keys; /* total key 'chunks' written, used with deleted to determine fragmentation */
150 struct _CamelTextIndexWord {
151 struct _CamelTextIndexWord *next;
152 struct _CamelTextIndexWord *prev;
154 camel_block_t data; /* where the data starts */
158 camel_key_t names[32];
161 #define CTI_PRIVATE(o) (((CamelTextIndex *)(o))->priv)
163 #define CI_CLASS(o) ((CamelTextIndexClass *)(((CamelObject *)o)->classfuncs))
165 /* ********************************************************************** */
167 /* ********************************************************************** */
169 static CamelObjectClass *camel_text_index_parent;
173 text_index_add_name_to_word(CamelIndex *idx, const char *word, camel_key_t nameid)
175 struct _CamelTextIndexWord *w, *wp, *ww;
176 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
179 struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
181 w = g_hash_table_lookup(p->words, word);
183 wordid = camel_partition_table_lookup(p->word_hash, word);
186 wordid = camel_key_table_add(p->word_index, word, 0, 0);
188 g_warning ("Could not create key entry for word '%s': %s\n",
189 word, strerror (errno));
192 if (camel_partition_table_add(p->word_hash, word, wordid) == -1) {
193 g_warning ("Could not create hash entry for word '%s': %s\n",
194 word, strerror (errno));
198 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
200 data = camel_key_table_lookup(p->word_index, wordid, NULL, 0);
202 g_warning ("Could not find key entry for word '%s': %s\n",
203 word, strerror (errno));
208 w = g_malloc0(sizeof(*w));
209 w->word = g_strdup(word);
214 w->names[0] = nameid;
215 g_hash_table_insert(p->words, w->word, w);
216 e_dlist_addhead(&p->word_cache, (EDListNode *)w);
217 p->word_cache_count++;
218 ww = (struct _CamelTextIndexWord *)p->word_cache.tailpred;
220 while (wp && p->word_cache_count > p->word_cache_limit) {
221 io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data));
222 if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) {
223 io(printf(" new data [%x]\n", ww->data));
225 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
226 /* if this call fails - we still point to the old data - not fatal */
227 camel_key_table_set_data(p->word_index, ww->wordid, ww->data);
228 e_dlist_remove((EDListNode *)ww);
229 g_hash_table_remove(p->words, ww->word);
232 p->word_cache_count--;
238 e_dlist_remove((EDListNode *)w);
239 e_dlist_addhead(&p->word_cache, (EDListNode *)w);
240 w->names[w->used] = nameid;
242 if (w->used == sizeof(w->names)/sizeof(w->names[0])) {
243 io(printf("writing key file entry '%s' [%x]\n", w->word, w->data));
244 if (camel_key_file_write(p->links, &w->data, w->used, w->names) != -1) {
246 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
247 /* if this call fails - we still point to the old data - not fatal */
248 camel_key_table_set_data(p->word_index, w->wordid, w->data);
250 /* FIXME: what to on error? lost data? */
257 text_index_sync(CamelIndex *idx)
259 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
260 struct _CamelTextIndexWord *ww;
261 struct _CamelTextIndexRoot *rb;
262 int ret = 0, wfrag, nfrag, work = FALSE;
264 d(printf("sync: blocks = %p\n", p->blocks));
266 if (p->blocks == NULL)
269 rb = (struct _CamelTextIndexRoot *)p->blocks->root;
271 /* sync/flush word cache */
273 CAMEL_TEXT_INDEX_LOCK(idx, lock);
275 /* we sync, bump down the cache limits since we dont need them for reading */
276 p->blocks->block_cache_limit = 128;
277 /* this doesn't really need to be dropped, its only used in updates anyway */
278 p->word_cache_limit = 1024;
280 work = !e_dlist_empty(&p->word_cache);
282 while ( (ww = (struct _CamelTextIndexWord *)e_dlist_remhead(&p->word_cache)) ) {
284 io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data));
285 if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) {
286 io(printf(" new data [%x]\n", ww->data));
288 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
289 camel_key_table_set_data(p->word_index, ww->wordid, ww->data);
295 g_hash_table_remove(p->words, ww->word);
300 if (camel_key_table_sync(p->word_index) == -1
301 || camel_key_table_sync(p->name_index) == -1
302 || camel_partition_table_sync(p->word_hash) == -1
303 || camel_partition_table_sync(p->name_hash) == -1)
306 /* only do the frag/compress check if we did some new writes on this index */
307 wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0;
308 nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0;
309 d(printf("wfrag = %d, nfrag = %d, work = %s, ret = %d\n", wfrag, nfrag, work?"true":"false", ret));
310 d(printf(" words = %d, keys = %d\n", rb->words, rb->keys));
313 if (wfrag > 30 || nfrag > 20)
314 ret = text_index_compress_nosync(idx);
317 ret = camel_block_file_sync(p->blocks);
319 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
324 static void tmp_name(const char *in, char *o)
328 s = strrchr(in, '/');
330 memcpy(o, in, s-in+1);
331 memcpy(o+(s-in+1), ".#", 2);
332 strcpy(o+(s-in+3), s+1);
334 sprintf(o, ".#%s", in);
339 text_index_compress(CamelIndex *idx)
343 CAMEL_TEXT_INDEX_LOCK(idx, lock);
345 ret = camel_index_sync(idx);
347 ret = text_index_compress_nosync(idx);
349 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
354 /* Attempt to recover index space by compressing the indices */
356 text_index_compress_nosync(CamelIndex *idx)
358 CamelTextIndex *newidx;
359 struct _CamelTextIndexPrivate *newp, *oldp;
360 camel_key_t oldkeyid, newkeyid;
362 unsigned int deleted;
363 camel_block_t data, newdata;
367 char *newpath, *savepath, *oldpath;
368 size_t count, newcount;
369 camel_key_t *records, newrecords[256];
370 struct _CamelTextIndexRoot *rb;
372 i = strlen(idx->path)+16;
375 savepath = alloca(i);
377 strcpy(oldpath, idx->path);
378 oldpath[strlen(oldpath)-strlen(".index")] = 0;
380 tmp_name(oldpath, newpath);
381 sprintf(savepath, "%s~", oldpath);
383 d(printf("Old index: %s\n", idx->path));
384 d(printf("Old path: %s\n", oldpath));
385 d(printf("New: %s\n", newpath));
386 d(printf("Save: %s\n", savepath));
388 newidx = camel_text_index_new(newpath, O_RDWR|O_CREAT);
392 newp = CTI_PRIVATE(newidx);
393 oldp = CTI_PRIVATE(idx);
395 CAMEL_TEXT_INDEX_LOCK(idx, lock);
397 rb = (struct _CamelTextIndexRoot *)newp->blocks->root;
405 For each name we still have:
406 Add it to the new index & setup remap table
409 Copy word's data to a new file
410 Add new word to index(*) (can we just copy blocks?) */
412 /* Copy undeleted names to new index file, creating new indices */
413 io(printf("Copying undeleted names to new file\n"));
414 remap = g_hash_table_new(NULL, NULL);
417 while ( (oldkeyid = camel_key_table_next(oldp->name_index, oldkeyid, &name, &flags, &data)) ) {
418 if ((flags&1) == 0) {
419 io(printf("copying name '%s'\n", name));
420 newkeyid = camel_key_table_add(newp->name_index, name, data, flags);
424 camel_partition_table_add(newp->name_hash, name, newkeyid);
425 g_hash_table_insert(remap, GINT_TO_POINTER(oldkeyid), GINT_TO_POINTER(newkeyid));
427 io(printf("deleted name '%s'\n", name));
433 /* Copy word data across, remapping/deleting and create new index for it */
434 /* We re-block the data into 256 entry lots while we're at it, since we only
435 have to do 1 at a time and its cheap */
437 while ( (oldkeyid = camel_key_table_next(oldp->word_index, oldkeyid, &name, &flags, &data)) ) {
438 io(printf("copying word '%s'\n", name));
446 if (camel_key_file_read(oldp->links, &data, &count, &records) == -1) {
447 io(printf("could not read from old keys at %d for word '%s'\n", (int)data, name));
450 for (i=0;i<count;i++) {
451 newkeyid = (camel_key_t)GPOINTER_TO_INT(g_hash_table_lookup(remap, GINT_TO_POINTER(records[i])));
453 newrecords[newcount++] = newkeyid;
454 if (newcount == sizeof(newrecords)/sizeof(newrecords[0])) {
455 if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1) {
467 if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1)
472 newkeyid = camel_key_table_add(newp->word_index, name, newdata, flags);
475 camel_partition_table_add(newp->word_hash, name, newkeyid);
481 camel_block_file_touch_block(newp->blocks, newp->blocks->root_block);
483 if (camel_index_sync((CamelIndex *)newidx) == -1)
486 /* Rename underlying files to match */
487 ret = camel_index_rename(idx, savepath);
491 /* If this fails, we'll pick up something during restart? */
492 ret = camel_index_rename((CamelIndex *)newidx, oldpath);
494 #define myswap(a, b) { void *tmp = a; a = b; b = tmp; }
495 /* Poke the private data across to the new object */
496 /* And change the fd's over, etc? */
497 /* Yes: This is a hack */
498 myswap(newp->blocks, oldp->blocks);
499 myswap(newp->links, oldp->links);
500 myswap(newp->word_index, oldp->word_index);
501 myswap(newp->word_hash, oldp->word_hash);
502 myswap(newp->name_index, oldp->name_index);
503 myswap(newp->name_hash, oldp->name_hash);
504 myswap(((CamelIndex *)newidx)->path, ((CamelIndex *)idx)->path);
509 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
511 camel_index_delete((CamelIndex *)newidx);
513 camel_object_unref((CamelObject *)newidx);
515 g_hash_table_destroy(remap);
517 /* clean up temp files always */
518 sprintf(savepath, "%s~.index", oldpath);
520 sprintf(newpath, "%s.data", savepath);
527 text_index_delete(CamelIndex *idx)
529 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
532 if (camel_block_file_delete(p->blocks) == -1)
534 if (camel_key_file_delete(p->links) == -1)
541 text_index_rename(CamelIndex *idx, const char *path)
543 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
544 char *newlink, *newblock;
547 CAMEL_TEXT_INDEX_LOCK(idx, lock);
549 newblock = alloca(strlen(path)+8);
550 sprintf(newblock, "%s.index", path);
551 ret = camel_block_file_rename(p->blocks, newblock);
553 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
557 newlink = alloca(strlen(path)+16);
558 sprintf(newlink, "%s.index.data", path);
559 ret = camel_key_file_rename(p->links, newlink);
562 camel_block_file_rename(p->blocks, idx->path);
563 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
569 idx->path = g_strdup(newblock);
571 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
577 text_index_has_name(CamelIndex *idx, const char *name)
579 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
581 return camel_partition_table_lookup(p->name_hash, name) != 0;
584 static CamelIndexName *
585 text_index_add_name(CamelIndex *idx, const char *name)
587 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
590 struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
592 CAMEL_TEXT_INDEX_LOCK(idx, lock);
594 /* if we're adding words, up the cache limits a lot */
596 p->blocks->block_cache_limit = 1024;
597 p->word_cache_limit = 8192;
600 /* If we have it already replace it */
601 keyid = camel_partition_table_lookup(p->name_hash, name);
603 /* TODO: We could just update the partition table's
604 key pointer rather than having to delete it */
606 camel_key_table_set_flags(p->name_index, keyid, 1, 1);
607 camel_partition_table_remove(p->name_hash, name);
610 keyid = camel_key_table_add(p->name_index, name, 0, 0);
612 camel_partition_table_add(p->name_hash, name, keyid);
616 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
618 /* TODO: if keyid == 0, we had a failure, we should somehow flag that, but for
619 now just return a valid object but discard its results, see text_index_write_name */
621 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
623 idn = (CamelIndexName *)camel_text_index_name_new((CamelTextIndex *)idx, name, keyid);
630 hash_write_word(char *word, void *data, CamelIndexName *idn)
632 CamelTextIndexName *tin = (CamelTextIndexName *)idn;
634 text_index_add_name_to_word(idn->index, word, tin->priv->nameid);
638 text_index_write_name(CamelIndex *idx, CamelIndexName *idn)
640 /* force 'flush' of any outstanding data */
641 camel_index_name_add_buffer(idn, NULL, 0);
643 /* see text_index_add_name for when this can be 0 */
644 if (((CamelTextIndexName *)idn)->priv->nameid != 0) {
645 CAMEL_TEXT_INDEX_LOCK(idx, lock);
647 g_hash_table_foreach(idn->words, (GHFunc)hash_write_word, idn);
649 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
655 static CamelIndexCursor *
656 text_index_find_name(CamelIndex *idx, const char *name)
658 /* what was this for, umm */
663 text_index_delete_name(CamelIndex *idx, const char *name)
665 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
667 struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
669 d(printf("Delete name: %s\n", name));
671 /* probably doesn't really need locking, but oh well */
672 CAMEL_TEXT_INDEX_LOCK(idx, lock);
674 /* We just mark the key deleted, and remove it from the hash table */
675 keyid = camel_partition_table_lookup(p->name_hash, name);
678 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
679 camel_key_table_set_flags(p->name_index, keyid, 1, 1);
680 camel_partition_table_remove(p->name_hash, name);
683 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
686 static CamelIndexCursor *
687 text_index_find(CamelIndex *idx, const char *word)
689 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
691 camel_block_t data = 0;
693 CamelIndexCursor *idc;
695 CAMEL_TEXT_INDEX_LOCK(idx, lock);
697 keyid = camel_partition_table_lookup(p->word_hash, word);
699 data = camel_key_table_lookup(p->word_index, keyid, NULL, &flags);
704 CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
706 idc = (CamelIndexCursor *)camel_text_index_cursor_new((CamelTextIndex *)idx, data);
711 static CamelIndexCursor *
712 text_index_words(CamelIndex *idx)
714 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
716 return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->word_index);
719 static CamelIndexCursor *
720 text_index_names(CamelIndex *idx)
722 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
724 return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->name_index);
728 camel_text_index_class_init(CamelTextIndexClass *klass)
730 CamelIndexClass *iklass = (CamelIndexClass *)klass;
732 camel_text_index_parent = CAMEL_OBJECT_CLASS(camel_type_get_global_classfuncs(camel_object_get_type()));
734 iklass->sync = text_index_sync;
735 iklass->compress = text_index_compress;
736 iklass->delete = text_index_delete;
738 iklass->rename = text_index_rename;
740 iklass->has_name = text_index_has_name;
741 iklass->add_name = text_index_add_name;
742 iklass->write_name = text_index_write_name;
743 iklass->find_name = text_index_find_name;
744 iklass->delete_name = text_index_delete_name;
745 iklass->find = text_index_find;
747 iklass->words = text_index_words;
748 iklass->names = text_index_names;
752 camel_text_index_init(CamelTextIndex *idx)
754 struct _CamelTextIndexPrivate *p;
756 p = CTI_PRIVATE(idx) = g_malloc0(sizeof(*p));
758 e_dlist_init(&p->word_cache);
759 p->words = g_hash_table_new(g_str_hash, g_str_equal);
760 p->word_cache_count = 0;
761 /* this cache size and the block cache size have been tuned for about the best
762 with moderate memory usage. Doubling the memory usage barely affects performance. */
763 p->word_cache_limit = 4096; /* 1024 = 128K */
765 g_static_rec_mutex_init(&p->lock);
769 camel_text_index_finalise(CamelTextIndex *idx)
771 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
773 camel_index_sync((CamelIndex *)idx);
775 g_assert(e_dlist_empty(&p->word_cache));
776 g_assert(g_hash_table_size(p->words) == 0);
778 g_hash_table_destroy(p->words);
781 camel_object_unref((CamelObject *)p->word_index);
783 camel_object_unref((CamelObject *)p->word_hash);
785 camel_object_unref((CamelObject *)p->name_index);
787 camel_object_unref((CamelObject *)p->name_hash);
790 camel_object_unref((CamelObject *)p->blocks);
792 camel_object_unref((CamelObject *)p->links);
794 g_static_rec_mutex_free(&p->lock);
800 camel_text_index_get_type(void)
802 static CamelType type = CAMEL_INVALID_TYPE;
804 if (type == CAMEL_INVALID_TYPE) {
805 type = camel_type_register(camel_index_get_type(), "CamelTextIndex",
806 sizeof (CamelTextIndex),
807 sizeof (CamelTextIndexClass),
808 (CamelObjectClassInitFunc) camel_text_index_class_init,
810 (CamelObjectInitFunc) camel_text_index_init,
811 (CamelObjectFinalizeFunc) camel_text_index_finalise);
818 text_index_normalise(CamelIndex *idx, const char *in, void *data)
822 /* Sigh, this is really expensive */
823 /*g_utf8_normalize(in, strlen(in), G_NORMALIZE_ALL);*/
824 word = g_utf8_strdown(in, -1);
830 camel_text_index_new(const char *path, int flags)
832 CamelTextIndex *idx = (CamelTextIndex *)camel_object_new(camel_text_index_get_type());
833 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
834 struct _CamelTextIndexRoot *rb;
838 camel_index_construct((CamelIndex *)idx, path, flags);
839 camel_index_set_normalise((CamelIndex *)idx, text_index_normalise, NULL);
841 p->blocks = camel_block_file_new(idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
842 link = alloca(strlen(idx->parent.path)+7);
843 sprintf(link, "%s.data", idx->parent.path);
844 p->links = camel_key_file_new(link, flags, CAMEL_TEXT_INDEX_KEY_VERSION);
846 if (p->blocks == NULL || p->links == NULL)
849 rb = (struct _CamelTextIndexRoot *)p->blocks->root;
851 if (rb->word_index_root == 0) {
852 bl = camel_block_file_new_block(p->blocks);
857 rb->word_index_root = bl->id;
858 camel_block_file_unref_block(p->blocks, bl);
859 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
862 if (rb->word_hash_root == 0) {
863 bl = camel_block_file_new_block(p->blocks);
868 rb->word_hash_root = bl->id;
869 camel_block_file_unref_block(p->blocks, bl);
870 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
873 if (rb->name_index_root == 0) {
874 bl = camel_block_file_new_block(p->blocks);
879 rb->name_index_root = bl->id;
880 camel_block_file_unref_block(p->blocks, bl);
881 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
884 if (rb->name_hash_root == 0) {
885 bl = camel_block_file_new_block(p->blocks);
890 rb->name_hash_root = bl->id;
891 camel_block_file_unref_block(p->blocks, bl);
892 camel_block_file_touch_block(p->blocks, p->blocks->root_block);
895 p->word_index = camel_key_table_new(p->blocks, rb->word_index_root);
896 p->word_hash = camel_partition_table_new(p->blocks, rb->word_hash_root);
897 p->name_index = camel_key_table_new(p->blocks, rb->name_index_root);
898 p->name_hash = camel_partition_table_new(p->blocks, rb->name_hash_root);
900 if (p->word_index == NULL || p->word_hash == NULL
901 || p->name_index == NULL || p->name_hash == NULL) {
902 camel_object_unref((CamelObject *)idx);
909 camel_object_unref((CamelObject *)idx);
913 /* returns 0 if the index exists, is valid, and synced, -1 otherwise */
915 camel_text_index_check(const char *path)
918 CamelBlockFile *blocks;
921 block = alloca(strlen(path)+7);
922 sprintf(block, "%s.index", path);
923 blocks = camel_block_file_new(block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
924 if (blocks == NULL) {
925 io(printf("Check failed: No block file: %s\n", strerror (errno)));
928 key = alloca(strlen(path)+12);
929 sprintf(key, "%s.index.data", path);
930 keys = camel_key_file_new(key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION);
932 io(printf("Check failed: No key file: %s\n", strerror (errno)));
933 camel_object_unref((CamelObject *)blocks);
937 camel_object_unref((CamelObject *)keys);
938 camel_object_unref((CamelObject *)blocks);
944 camel_text_index_rename(const char *old, const char *new)
946 char *oldname, *newname;
949 /* TODO: camel_text_index_rename should find out if we have an active index and use that instead */
951 oldname = alloca(strlen(old)+12);
952 newname = alloca(strlen(new)+12);
953 sprintf(oldname, "%s.index", old);
954 sprintf(newname, "%s.index", new);
956 if (g_rename(oldname, newname) == -1 && errno != ENOENT)
959 sprintf(oldname, "%s.index.data", old);
960 sprintf(newname, "%s.index.data", new);
962 if (g_rename(oldname, newname) == -1 && errno != ENOENT) {
964 sprintf(oldname, "%s.index", old);
965 sprintf(newname, "%s.index", new);
966 g_rename(newname, oldname);
975 camel_text_index_remove(const char *old)
980 /* TODO: needs to poke any active indices to remain unlinked */
982 block = alloca(strlen(old)+12);
983 key = alloca(strlen(old)+12);
984 sprintf(block, "%s.index", old);
985 sprintf(key, "%s.index.data", old);
987 if (g_unlink(block) == -1 && errno != ENOENT)
989 if (g_unlink(key) == -1 && errno != ENOENT)
997 camel_text_index_info(CamelTextIndex *idx)
999 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
1000 struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
1003 printf("Path: '%s'\n", idx->parent.path);
1004 printf("Version: %u\n", idx->parent.version);
1005 printf("Flags: %08x\n", idx->parent.flags);
1006 printf("Total words: %u\n", rb->words);
1007 printf("Total names: %u\n", rb->names);
1008 printf("Total deleted: %u\n", rb->deleted);
1009 printf("Total key blocks: %u\n", rb->keys);
1011 if (rb->words > 0) {
1012 frag = ((rb->keys - rb->words) * 100)/ rb->words;
1013 printf("Word fragmentation: %d%%\n", frag);
1016 if (rb->names > 0) {
1017 frag = (rb->deleted * 100)/ rb->names;
1018 printf("Name fragmentation: %d%%\n", frag);
1022 /* #define DUMP_RAW */
1025 enum { KEY_ROOT = 1, KEY_DATA = 2, PARTITION_MAP = 4, PARTITION_DATA = 8 };
1028 add_type(GHashTable *map, camel_block_t id, int type)
1032 old = g_hash_table_lookup(map, id);
1036 if (old != 0 && old != type)
1037 g_warning("block %x redefined as type %d, already type %d\n", id, type, old);
1038 g_hash_table_insert(map, id, GINT_TO_POINTER (type|old));
1042 add_partition(GHashTable *map, CamelBlockFile *blocks, camel_block_t id)
1045 CamelPartitionMapBlock *pm;
1049 add_type(map, id, PARTITION_MAP);
1050 bl = camel_block_file_get_block(blocks, id);
1052 g_warning("couldn't get parition: %x\n", id);
1056 pm = (CamelPartitionMapBlock *)&bl->data;
1057 if (pm->used > sizeof(pm->partition)/sizeof(pm->partition[0])) {
1058 g_warning("Partition block %x invalid\n", id);
1059 camel_block_file_unref_block(blocks, bl);
1063 for (i=0;i<pm->used;i++)
1064 add_type(map, pm->partition[i].blockid, PARTITION_DATA);
1067 camel_block_file_unref_block(blocks, bl);
1072 add_keys(GHashTable *map, CamelBlockFile *blocks, camel_block_t id)
1074 CamelBlock *rbl, *bl;
1075 CamelKeyRootBlock *root;
1078 add_type(map, id, KEY_ROOT);
1079 rbl = camel_block_file_get_block(blocks, id);
1081 g_warning("couldn't get key root: %x\n", id);
1084 root = (CamelKeyRootBlock *)&rbl->data;
1088 add_type(map, id, KEY_DATA);
1089 bl = camel_block_file_get_block(blocks, id);
1091 g_warning("couldn't get key: %x\n", id);
1095 kb = (CamelKeyBlock *)&bl->data;
1097 camel_block_file_unref_block(blocks, bl);
1100 camel_block_file_unref_block(blocks, rbl);
1104 dump_raw(GHashTable *map, char *path)
1108 char *p, c, *e, *a, *o;
1109 int v, n, len, i, type;
1110 char hex[16] = "0123456789ABCDEF";
1112 camel_block_t id, total;
1114 fd = g_open(path, O_RDONLY|O_BINARY, 0);
1119 while ((len = read(fd, buf, 1024)) == 1024) {
1124 type = g_hash_table_lookup(map, id);
1127 printf(" - unknown -\n");
1130 printf(" - invalid -\n");
1133 CamelKeyRootBlock *r = (CamelKeyRootBlock *)buf;
1134 printf("Key root:\n");
1135 printf("First: %08x Last: %08x Free: %08x\n", r->first, r->last, r->free);
1138 CamelKeyBlock *k = (CamelKeyBlock *)buf;
1139 printf("Key data:\n");
1140 printf("Next: %08x Used: %u\n", k->next, k->used);
1141 for (i=0;i<k->used;i++) {
1143 len = sizeof(k->u.keydata);
1145 len = k->u.keys[i-1].offset;
1146 len -= k->u.keys[i].offset;
1147 printf("[%03d]: %08x %5d %06x %3d '%.*s'\n", i,
1148 k->u.keys[i].data, k->u.keys[i].offset, k->u.keys[i].flags,
1149 len, len, k->u.keydata+k->u.keys[i].offset);
1152 case PARTITION_MAP: {
1153 CamelPartitionMapBlock *m = (CamelPartitionMapBlock *)buf;
1154 printf("Partition map\n");
1155 printf("Next: %08x Used: %u\n", m->next, m->used);
1156 for (i=0;i<m->used;i++) {
1157 printf("[%03d]: %08x -> %08x\n", i, m->partition[i].hashid, m->partition[i].blockid);
1160 case PARTITION_DATA: {
1161 CamelPartitionKeyBlock *k = (CamelPartitionKeyBlock *)buf;
1162 printf("Partition data\n");
1163 printf("Used: %u\n", k->used);
1168 printf("--raw--\n");
1173 sprintf(line, "%08x: ", total);
1178 while (len && i<16) {
1180 *a++ = isprint(c)?c:'.';
1181 *o++ = hex[(c>>4)&0x0f];
1189 printf("%s\n", line);
1199 camel_text_index_dump(CamelTextIndex *idx)
1201 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
1209 /* Iterate over all names in the file first */
1211 printf("UID's in index\n");
1214 while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) {
1215 if ((flags & 1) == 0)
1216 printf(" %s\n", word);
1218 printf(" %s (deleted)\n", word);
1222 printf("Word's in index\n");
1225 while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) {
1226 CamelIndexCursor *idc;
1228 printf("Word: '%s':\n", word);
1230 idc = camel_index_find((CamelIndex *)idx, word);
1231 while ( (name = camel_index_cursor_next(idc)) ) {
1232 printf(" %s", name);
1235 camel_object_unref((CamelObject *)idc);
1239 /* a more low-level dump routine */
1240 GHashTable *block_type = g_hash_table_new(NULL, NULL);
1245 add_keys(block_type, p->blocks, p->word_index->rootid);
1246 add_keys(block_type, p->blocks, p->name_index->rootid);
1248 add_partition(block_type, p->blocks, p->word_hash->rootid);
1249 add_partition(block_type, p->blocks, p->name_hash->rootid);
1251 dump_raw(block_type, p->blocks->path);
1252 g_hash_table_destroy(block_type);
1256 /* more debug stuff */
1258 camel_text_index_validate(CamelTextIndex *idx)
1260 struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
1267 camel_key_t *records;
1270 GHashTable *names, *deleted, *words, *keys, *name_word, *word_word;
1272 names = g_hash_table_new(NULL, NULL);
1273 deleted = g_hash_table_new(NULL, NULL);
1275 name_word = g_hash_table_new(g_str_hash, g_str_equal);
1277 words = g_hash_table_new(NULL, NULL);
1278 keys = g_hash_table_new(NULL, NULL);
1280 word_word = g_hash_table_new(g_str_hash, g_str_equal);
1282 /* Iterate over all names in the file first */
1284 printf("Checking UID consistency\n");
1287 while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) {
1288 if ((oldword = g_hash_table_lookup(names, GINT_TO_POINTER(keyid))) != NULL
1289 || (oldword = g_hash_table_lookup(deleted, GINT_TO_POINTER(keyid))) != NULL) {
1290 printf("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1293 g_hash_table_insert(name_word, word, GINT_TO_POINTER (1));
1294 if ((flags & 1) == 0) {
1295 g_hash_table_insert(names, GINT_TO_POINTER(keyid), word);
1297 g_hash_table_insert(deleted, GINT_TO_POINTER(keyid), word);
1302 printf("Checking WORD member consistency\n");
1305 while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) {
1306 CamelIndexCursor *idc;
1309 /* first, check for duplicates of keyid, and data */
1310 if ((oldword = g_hash_table_lookup(words, GINT_TO_POINTER(keyid))) != NULL) {
1311 printf("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1315 g_hash_table_insert(words, GINT_TO_POINTER(keyid), word);
1319 /* This may not be an issue if things have been removed over time,
1320 though it is a problem if its a fresh index */
1321 printf("Word '%s' has no data associated with it\n", word);
1323 if ((oldword = g_hash_table_lookup(keys, GUINT_TO_POINTER(data))) != NULL) {
1324 printf("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword);
1326 g_hash_table_insert(keys, GUINT_TO_POINTER(data), word);
1330 if ((oldword = g_hash_table_lookup(word_word, word)) != NULL) {
1331 printf("Warning, word '%s' occurs more than once\n", word);
1333 g_hash_table_insert(word_word, word, word);
1336 used = g_hash_table_new(g_str_hash, g_str_equal);
1338 idc = camel_index_find((CamelIndex *)idx, word);
1339 while ( (name = camel_index_cursor_next(idc)) ) {
1340 if (g_hash_table_lookup(name_word, name) == NULL) {
1341 printf("word '%s' references non-existant name '%s'\n", word, name);
1343 if (g_hash_table_lookup(used, name) != NULL) {
1344 printf("word '%s' uses word '%s' more than once\n", word, name);
1346 g_hash_table_insert(used, g_strdup(name), (void *)1);
1349 camel_object_unref((CamelObject *)idc);
1351 g_hash_table_foreach(used, (GHFunc)g_free, NULL);
1352 g_hash_table_destroy(used);
1354 printf("word '%s'\n", word);
1357 printf(" data %x ", data);
1358 if (camel_key_file_read(p->links, &data, &count, &records) == -1) {
1359 printf("Warning, read failed for word '%s', at data '%u'\n", word, data);
1362 printf("(%d)\n", (int)count);
1368 g_hash_table_destroy(names);
1369 g_hash_table_destroy(deleted);
1370 g_hash_table_destroy(words);
1371 g_hash_table_destroy(keys);
1373 g_hash_table_foreach(name_word, (GHFunc)g_free, NULL);
1374 g_hash_table_destroy(name_word);
1376 g_hash_table_foreach(word_word, (GHFunc)g_free, NULL);
1377 g_hash_table_destroy(word_word);
1380 /* ********************************************************************** */
1381 /* CamelTextIndexName */
1382 /* ********************************************************************** */
1384 static CamelIndexNameClass *camel_text_index_name_parent;
1386 #define CIN_CLASS(o) ((CamelTextIndexNameClass *)(((CamelObject *)o)->classfuncs))
1387 #define CIN_PRIVATE(o) (((CamelTextIndexName *)(o))->priv)
1390 text_index_name_add_word(CamelIndexName *idn, const char *word)
1392 struct _CamelTextIndexNamePrivate *p = ((CamelTextIndexName *)idn)->priv;
1394 if (g_hash_table_lookup(idn->words, word) == NULL) {
1395 char *w = e_mempool_strdup(p->pool, word);
1397 g_hash_table_insert(idn->words, w, w);
1402 Because it doesn't hang/loop forever on bad data
1403 Used to clean up utf8 before it gets further */
1405 static inline guint32
1406 camel_utf8_next(const unsigned char **ptr, const unsigned char *ptrend)
1408 register unsigned char *p = (unsigned char *)*ptr;
1409 register unsigned int c;
1416 while ( (c = *p++) ) {
1420 } else if ((c&0xe0) == 0xc0) {
1423 } else if ((c&0xf0) == 0xe0) {
1426 } else if ((c&0xf8) == 0xf0) {
1429 } else if ((c&0xfc) == 0xf8) {
1432 } else if ((c&0xfe) == 0xfc) {
1436 /* Invalid, ignore and look for next start char if room */
1443 /* bad data or truncated buffer */
1447 while (l && ((c = *p) & 0xc0) == 0x80) {
1450 v = (v << 6) | (c & 0x3f);
1459 /* else look for a start char again */
1466 text_index_name_add_buffer(CamelIndexName *idn, const char *buffer, size_t len)
1468 CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
1469 const unsigned char *ptr, *ptrend;
1471 unsigned char utf8[8];
1474 if (buffer == NULL) {
1475 if (p->buffer->len) {
1476 camel_index_name_add_word(idn, p->buffer->str);
1477 g_string_truncate(p->buffer, 0);
1483 ptrend = buffer+len;
1484 while ((c = camel_utf8_next(&ptr, ptrend))) {
1485 if (g_unichar_isalnum(c)) {
1486 c = g_unichar_tolower(c);
1487 utf8len = g_unichar_to_utf8(c, utf8);
1489 g_string_append(p->buffer, utf8);
1491 if (p->buffer->len > 0 && p->buffer->len <= CAMEL_TEXT_INDEX_MAX_WORDLEN) {
1492 text_index_name_add_word(idn, p->buffer->str);
1493 /*camel_index_name_add_word(idn, p->buffer->str);*/
1496 g_string_truncate (p->buffer, 0);
1504 camel_text_index_name_class_init(CamelTextIndexNameClass *klass)
1506 CamelIndexNameClass *nklass = (CamelIndexNameClass *)klass;
1508 camel_text_index_name_parent = CAMEL_INDEX_NAME_CLASS(camel_type_get_global_classfuncs(camel_index_name_get_type()));
1510 nklass->add_word = text_index_name_add_word;
1511 nklass->add_buffer = text_index_name_add_buffer;
1515 camel_text_index_name_init(CamelTextIndexName *idn)
1517 CamelTextIndexNamePrivate *p;
1519 idn->parent.words = g_hash_table_new(g_str_hash, g_str_equal);
1521 p = idn->priv = g_malloc0(sizeof(*idn->priv));
1522 p->buffer = g_string_new("");
1523 p->pool = e_mempool_new(256, 128, E_MEMPOOL_ALIGN_BYTE);
1527 camel_text_index_name_finalise(CamelTextIndexName *idn)
1529 CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
1531 g_hash_table_destroy(idn->parent.words);
1533 g_string_free(p->buffer, TRUE);
1534 e_mempool_destroy(p->pool);
1540 camel_text_index_name_get_type(void)
1542 static CamelType type = CAMEL_INVALID_TYPE;
1544 if (type == CAMEL_INVALID_TYPE) {
1545 type = camel_type_register(camel_index_name_get_type(), "CamelTextIndexName",
1546 sizeof (CamelTextIndexName),
1547 sizeof (CamelTextIndexNameClass),
1548 (CamelObjectClassInitFunc) camel_text_index_name_class_init,
1550 (CamelObjectInitFunc) camel_text_index_name_init,
1551 (CamelObjectFinalizeFunc) camel_text_index_name_finalise);
1557 CamelTextIndexName *
1558 camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid)
1560 CamelTextIndexName *idn = (CamelTextIndexName *)camel_object_new(camel_text_index_name_get_type());
1561 CamelIndexName *cin = &idn->parent;
1562 CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
1564 cin->index = (CamelIndex *)idx;
1565 camel_object_ref((CamelObject *)idx);
1566 cin->name = e_mempool_strdup(p->pool, name);
1572 /* ********************************************************************** */
1573 /* CamelTextIndexCursor */
1574 /* ********************************************************************** */
1576 static CamelIndexCursorClass *camel_text_index_cursor_parent;
1578 #define CIC_CLASS(o) ((CamelTextIndexCursorClass *)(((CamelObject *)o)->classfuncs))
1579 #define CIC_PRIVATE(o) (((CamelTextIndexCursor *)(o))->priv)
1582 text_index_cursor_next(CamelIndexCursor *idc)
1584 struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
1585 struct _CamelTextIndexPrivate *tip = CTI_PRIVATE(idc->index);
1588 c(printf("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next));
1591 while (p->record_index >= p->record_count) {
1594 p->record_index = 0;
1595 p->record_count = 0;
1598 if (camel_key_file_read(tip->links, &p->next, &p->record_count, &p->records) == -1)
1603 camel_key_table_lookup(tip->name_index, p->records[p->record_index], &p->current, &flags);
1609 } while (p->current == NULL);
1615 text_index_cursor_reset(CamelIndexCursor *idc)
1617 struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
1623 p->record_count = 0;
1624 p->record_index = 0;
1629 camel_text_index_cursor_class_init(CamelTextIndexCursorClass *klass)
1631 CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass;
1633 camel_text_index_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type()));
1635 cklass->next = text_index_cursor_next;
1636 cklass->reset = text_index_cursor_reset;
1640 camel_text_index_cursor_init(CamelTextIndexCursor *idc)
1642 CIC_PRIVATE(idc) = g_malloc0(sizeof(struct _CamelTextIndexCursorPrivate));
1646 camel_text_index_cursor_finalise(CamelTextIndexCursor *idc)
1648 struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
1656 camel_text_index_cursor_get_type(void)
1658 static CamelType type = CAMEL_INVALID_TYPE;
1660 if (type == CAMEL_INVALID_TYPE) {
1661 type = camel_type_register(camel_index_cursor_get_type(), "CamelTextIndexCursor",
1662 sizeof (CamelTextIndexCursor),
1663 sizeof (CamelTextIndexCursorClass),
1664 (CamelObjectClassInitFunc) camel_text_index_cursor_class_init,
1666 (CamelObjectInitFunc) camel_text_index_cursor_init,
1667 (CamelObjectFinalizeFunc) camel_text_index_cursor_finalise);
1673 CamelTextIndexCursor *
1674 camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data)
1676 CamelTextIndexCursor *idc = (CamelTextIndexCursor *)camel_object_new(camel_text_index_cursor_get_type());
1677 CamelIndexCursor *cic = &idc->parent;
1678 struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
1680 cic->index = (CamelIndex *)idx;
1681 camel_object_ref((CamelObject *)idx);
1684 p->record_count = 0;
1685 p->record_index = 0;
1690 /* ********************************************************************** */
1691 /* CamelTextIndexKeyCursor */
1692 /* ********************************************************************** */
1694 static CamelIndexCursorClass *camel_text_index_key_cursor_parent;
1696 #define CIKC_CLASS(o) ((CamelTextIndexKeyCursorClass *)(((CamelObject *)o)->classfuncs))
1697 #define CIKC_PRIVATE(o) (((CamelTextIndexKeyCursor *)(o))->priv)
1700 text_index_key_cursor_next(CamelIndexCursor *idc)
1702 struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
1704 c(printf("Going to next cursor for keyid %08x\n", p->keyid));
1709 while ( (p->keyid = camel_key_table_next(p->table, p->keyid, &p->current, &p->flags, &p->data)) ) {
1710 if ((p->flags & 1) == 0) {
1722 text_index_key_cursor_reset(CamelIndexCursor *idc)
1724 struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
1734 camel_text_index_key_cursor_class_init(CamelTextIndexKeyCursorClass *klass)
1736 CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass;
1738 camel_text_index_key_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type()));
1740 cklass->next = text_index_key_cursor_next;
1741 cklass->reset = text_index_key_cursor_reset;
1745 camel_text_index_key_cursor_init(CamelTextIndexKeyCursor *idc)
1747 struct _CamelTextIndexKeyCursorPrivate *p;
1749 p = idc->priv = g_malloc0(sizeof(struct _CamelTextIndexKeyCursorPrivate));
1757 camel_text_index_key_cursor_finalise(CamelTextIndexKeyCursor *idc)
1759 struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
1763 camel_object_unref((CamelObject *)p->table);
1768 camel_text_index_key_cursor_get_type(void)
1770 static CamelType type = CAMEL_INVALID_TYPE;
1772 if (type == CAMEL_INVALID_TYPE) {
1773 type = camel_type_register(camel_index_cursor_get_type(), "CamelTextIndexKeyCursor",
1774 sizeof (CamelTextIndexKeyCursor),
1775 sizeof (CamelTextIndexKeyCursorClass),
1776 (CamelObjectClassInitFunc) camel_text_index_key_cursor_class_init,
1778 (CamelObjectInitFunc) camel_text_index_key_cursor_init,
1779 (CamelObjectFinalizeFunc) camel_text_index_key_cursor_finalise);
1785 CamelTextIndexKeyCursor *
1786 camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table)
1788 CamelTextIndexKeyCursor *idc = (CamelTextIndexKeyCursor *)camel_object_new(camel_text_index_key_cursor_get_type());
1789 CamelIndexCursor *cic = &idc->parent;
1790 struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
1792 cic->index = (CamelIndex *)idx;
1793 camel_object_ref((CamelObject *)idx);
1795 camel_object_ref((CamelObject *)table);
1800 /* ********************************************************************** */
1806 struct _CamelIndexRoot {
1807 struct _CamelBlockRoot root;
1809 camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */
1810 camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
1812 camel_block_t name_root; /* same, for names */
1813 camel_block_t name_hash_root;
1816 char wordbuffer[] = "This is a buffer of multiple words. Some of the words are duplicates"
1817 " while other words are the same, some are in difFerenT Different different case cAsE casE,"
1818 " with,with:with;with-with'with\"'\"various punctuation as well. So much for those Words. and 10"
1819 " numbers in a row too 1,2,3,4,5,6,7,8,9,10! Yay!.";
1821 int main(int argc, char **argv)
1826 CamelPartitionTable *cpi;
1827 CamelBlock *keyroot, *partroot;
1828 struct _CamelIndexRoot *root;
1830 char line[256], *key;
1832 int index = 0, flags, data;
1835 CamelIndexName *idn;
1836 CamelIndexCursor *idc;
1840 printf("Camel text index tester!\n");
1842 g_thread_init(NULL);
1843 camel_init(NULL, 0);
1845 idx = (CamelIndex *)camel_text_index_new("textindex", O_CREAT|O_RDWR|O_TRUNC);
1849 camel_index_compress(idx);
1854 for (i=0;i<100;i++) {
1857 sprintf(name, "%d", i);
1858 printf("Adding words to name '%s'\n", name);
1859 idn = camel_index_add_name(idx, name);
1860 camel_index_name_add_buffer(idn, wordbuffer, sizeof(wordbuffer)-1);
1861 camel_index_write_name(idx, idn);
1862 camel_object_unref((CamelObject *)idn);
1865 printf("Looking up which names contain word 'word'\n");
1866 idc = camel_index_find(idx, "words");
1867 while ( (word = camel_index_cursor_next(idc)) != NULL ) {
1868 printf(" name is '%s'\n", word);
1870 camel_object_unref((CamelObject *)idc);
1873 printf("Looking up which names contain word 'truncate'\n");
1874 idc = camel_index_find(idx, "truncate");
1875 while ( (word = camel_index_cursor_next(idc)) != NULL ) {
1876 printf(" name is '%s'\n", word);
1878 camel_object_unref((CamelObject *)idc);
1881 camel_index_sync(idx);
1882 camel_object_unref((CamelObject *)idx);
1885 bs = camel_block_file_new("blocks", "TESTINDX", CAMEL_BLOCK_SIZE);
1887 root = (struct _CamelIndexRoot *)bs->root;
1888 if (root->word_root == 0) {
1889 keyroot = camel_block_file_new_block(bs);
1890 root->word_root = keyroot->id;
1891 camel_block_file_touch_block(bs, bs->root_block);
1893 if (root->word_hash_root == 0) {
1894 partroot = camel_block_file_new_block(bs);
1895 root->word_hash_root = partroot->id;
1896 camel_block_file_touch_block(bs, bs->root_block);
1899 ki = camel_key_table_new(bs, root->word_root);
1900 cpi = camel_partition_table_new(bs, root->word_hash_root);
1902 fp = fopen("/usr/dict/words", "r");
1908 while (fgets(line, sizeof(line), fp) != NULL) {
1909 line[strlen(line)-1] = 0;
1911 /* see if its already there */
1912 keyid = camel_partition_table_lookup(cpi, line);
1914 m(printf("Adding word '%s' %d\n", line, index));
1916 keyid = camel_key_table_add(ki, line, index, 0);
1917 m(printf(" key = %08x\n", keyid));
1919 camel_partition_table_add(cpi, line, keyid);
1921 m(printf("Lookup word '%s'\n", line));
1922 keyid = camel_partition_table_lookup(cpi, line);
1923 m(printf(" key = %08x\n", keyid));
1926 m(printf("Lookup key %08x\n", keyid));
1928 camel_key_table_set_flags(ki, keyid, index, 1);
1930 data = camel_key_table_lookup(ki, keyid, &key, &flags);
1931 m(printf(" word = '%s' %d %04x\n", key, data, flags));
1933 g_assert(data == index && strcmp(key, line) == 0);
1940 printf("Scanning again\n");
1941 fseek(fp, SEEK_SET, 0);
1943 while (fgets(line, sizeof(line), fp) != NULL) {
1944 line[strlen(line)-1] = 0;
1945 m(printf("Lookup word '%s' %d\n", line, index));
1946 keyid = camel_partition_table_lookup(cpi, line);
1947 m(printf(" key = %08d\n", keyid));
1949 m(printf("Lookup key %08x\n", keyid));
1950 data = camel_key_table_lookup(ki, keyid, &key, &flags);
1951 m(printf(" word = '%s' %d\n", key, data));
1953 g_assert(data == index && strcmp(key, line) == 0);
1961 printf("Freeing partition index\n");
1962 camel_partition_table_free(cpi);
1964 printf("Syncing block file\n");
1965 camel_block_file_sync(bs);