From 5ac93b27a0b3fe0c23de226ba255643cfe93d40e Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 8 Nov 2012 12:14:49 +0800 Subject: [PATCH] write mask out for chewing large table --- src/storage/chewing_large_table.cpp | 102 +++++++++++++++++++++++++++++++++++- src/storage/chewing_large_table.h | 7 +++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp index 7639dfe..33bb733 100644 --- a/src/storage/chewing_large_table.cpp +++ b/src/storage/chewing_large_table.cpp @@ -56,6 +56,9 @@ public: /* get length method */ int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); }; @@ -91,6 +94,9 @@ public: /* get length method */ int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); }; }; @@ -342,7 +348,6 @@ ChewingLengthIndexLevel::~ChewingLengthIndexLevel() { } #undef CASE g_array_free(m_chewing_array_indexes, TRUE); - m_chewing_array_indexes = NULL; } @@ -944,3 +949,98 @@ int ChewingArrayIndexLevel::get_length() const { return chunk_end - chunk_begin; } + + +/* mask out method */ + +bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask, + phrase_token_t value) { + for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k) + for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l) + for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m) + for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES; + ++n) { + ChewingLengthIndexLevel * & length_array = + m_chewing_length_indexes[k][l][m][n]; + + if (NULL == length_array) + continue; + + length_array->mask_out(mask, value); + + if (0 == length_array->get_length()) { + delete length_array; + length_array = NULL; + } + } + return true; +} + +bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask, + phrase_token_t value) { +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel * & array = g_array_index \ + (m_chewing_array_indexes, \ + ChewingArrayIndexLevel *, len); \ + \ + if (NULL == array) \ + continue; \ + \ + array->mask_out(mask, value); \ + \ + if (0 == array->get_length()) { \ + delete array; \ + array = NULL; \ + } \ + break; \ + } + + for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { + switch (i){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + } +#undef CASE + g_array_set_size(m_chewing_array_indexes, get_length()); + return true; +} + +template +bool ChewingArrayIndexLevel::mask_out +(phrase_token_t mask, phrase_token_t value) { + IndexItem * begin = NULL, * end = NULL; + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + for (IndexItem * cur = begin; cur != end; ++cur) { + if ((cur->m_token & mask) != value) + continue; + + int offset = (cur - begin) * sizeof(IndexItem); + m_chunk.remove_content(offset, sizeof(IndexItem)); + + /* update chunk end. */ + end = (IndexItem *) m_chunk.end(); + --cur; + } + + return true; +} diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h index 746c18d..59f494e 100644 --- a/src/storage/chewing_large_table.h +++ b/src/storage/chewing_large_table.h @@ -75,12 +75,15 @@ public: /* search method */ int search(int phrase_length, /* in */ ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const; + /* add/remove index method */ int add_index(int phrase_length, /* in */ ChewingKey keys[], /* in */ phrase_token_t token); int remove_index(int phrase_length, /* in */ ChewingKey keys[], /* in */ phrase_token_t token); + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); }; @@ -138,6 +141,10 @@ public: return m_bitmap_table.remove_index(phrase_length, keys, token); } + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value) { + return m_bitmap_table.mask_out(mask, value); + } }; }; -- 2.7.4