3 * Library to deal with pinyin.
5 * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include "chewing_large_table.h"
24 #include "pinyin_phrase2.h"
25 #include "pinyin_parser2.h"
28 /* internal class definition */
31 class ChewingLengthIndexLevel{
34 GArray * m_chewing_array_indexes;
37 /* constructor/destructor */
38 ChewingLengthIndexLevel();
39 ~ChewingLengthIndexLevel();
41 /* load/store method */
42 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
43 bool store(MemoryChunk * new_chunk, table_offset_t offset,
44 table_offset_t & end);
47 int search(pinyin_option_t options, int phrase_length,
48 /* in */ const ChewingKey keys[],
49 /* out */ PhraseIndexRanges ranges) const;
51 /* add/remove index method */
52 int add_index(int phrase_length, /* in */ const ChewingKey keys[],
53 /* in */ phrase_token_t token);
54 int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
55 /* in */ phrase_token_t token);
57 /* get length method */
58 int get_length() const;
61 bool mask_out(phrase_token_t mask, phrase_token_t value);
65 template<size_t phrase_length>
66 class ChewingArrayIndexLevel{
68 typedef PinyinIndexItem2<phrase_length> IndexItem;
73 /* compress consecutive tokens */
74 int convert(pinyin_option_t options,
75 const ChewingKey keys[],
78 PhraseIndexRanges ranges) const;
81 /* load/store method */
82 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
83 bool store(MemoryChunk * new_chunk, table_offset_t offset,
84 table_offset_t & end);
87 int search(pinyin_option_t options, /* in */const ChewingKey keys[],
88 /* out */ PhraseIndexRanges ranges) const;
90 /* add/remove index method */
91 int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token);
92 int remove_index(/* in */ const ChewingKey keys[],
93 /* in */ phrase_token_t token);
95 /* get length method */
96 int get_length() const;
99 bool mask_out(phrase_token_t mask, phrase_token_t value);
105 using namespace pinyin;
107 /* class implementation */
109 ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
110 : m_options(options) {
111 memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
114 void ChewingBitmapIndexLevel::reset() {
115 for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
116 for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
117 for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
118 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
120 ChewingLengthIndexLevel * & length_array =
121 m_chewing_length_indexes[k][l][m][n];
131 int ChewingBitmapIndexLevel::search(int phrase_length,
132 /* in */ const ChewingKey keys[],
133 /* out */ PhraseIndexRanges ranges) const {
134 assert(phrase_length > 0);
135 return initial_level_search(phrase_length, keys, ranges);
138 int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
139 /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
142 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
144 result |= middle_and_final_level_search(ORIGIN, phrase_length, \
146 if (m_options & AMBIGUITY) { \
147 result |= middle_and_final_level_search(ANOTHER, \
154 /* deal with ambiguities */
155 int result = SEARCH_NONE;
156 const ChewingKey & first_key = keys[0];
158 switch(first_key.m_initial) {
159 MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
160 MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
161 MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
162 MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
163 MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
164 MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
165 MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
166 MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
167 MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
168 MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
169 MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
170 MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
174 result |= middle_and_final_level_search
175 (CHEWING_L, phrase_length, keys, ranges);
177 if (m_options & PINYIN_AMB_L_N)
178 result |= middle_and_final_level_search
179 (CHEWING_N, phrase_length, keys,ranges);
181 if (m_options & PINYIN_AMB_L_R)
182 result |= middle_and_final_level_search
183 (CHEWING_R, phrase_length, keys, ranges);
188 result |= middle_and_final_level_search
189 ((ChewingInitial) first_key.m_initial,
190 phrase_length, keys, ranges);
199 int ChewingBitmapIndexLevel::middle_and_final_level_search
200 (ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[],
201 /* out */ PhraseIndexRanges ranges) const {
204 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
206 result = tone_level_search \
208 ORIGIN, phrase_length, keys, ranges); \
209 if (m_options & AMBIGUITY) { \
210 result |= tone_level_search \
212 ANOTHER, phrase_length, keys, ranges); \
217 int result = SEARCH_NONE;
218 const ChewingKey & first_key = keys[0];
219 const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
221 switch(first_key.m_final) {
222 case CHEWING_ZERO_FINAL:
224 if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
225 if (!(m_options & PINYIN_INCOMPLETE))
227 for (int m = CHEWING_ZERO_MIDDLE;
228 m < CHEWING_NUMBER_OF_MIDDLES; ++m)
229 for (int n = CHEWING_ZERO_FINAL;
230 n < CHEWING_NUMBER_OF_FINALS; ++n) {
232 if (CHEWING_ZERO_MIDDLE == m &&
233 CHEWING_ZERO_FINAL == n)
236 result |= tone_level_search
237 (initial, (ChewingMiddle) m, (ChewingFinal) n,
238 phrase_length, keys, ranges);
241 } else { /* normal pinyin */
242 result |= tone_level_search
243 (initial, middle, CHEWING_ZERO_FINAL,
244 phrase_length, keys, ranges);
249 MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
250 MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
251 MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
252 MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
253 MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
254 MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
258 result |= tone_level_search
259 (initial, middle, (ChewingFinal) first_key.m_final,
260 phrase_length, keys, ranges);
269 int ChewingBitmapIndexLevel::tone_level_search
270 (ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
271 int phrase_length, /* in */ const ChewingKey keys[],
272 /* out */ PhraseIndexRanges ranges) const {
274 int result = SEARCH_NONE;
275 const ChewingKey & first_key = keys[0];
277 switch (first_key.m_tone) {
278 case CHEWING_ZERO_TONE:
280 /* deal with zero tone in chewing large table. */
281 for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
282 ChewingLengthIndexLevel * phrases =
283 m_chewing_length_indexes
284 [initial][middle][final][(ChewingTone)i];
286 result |= phrases->search
287 (m_options, phrase_length - 1, keys + 1, ranges);
293 ChewingLengthIndexLevel * phrases =
294 m_chewing_length_indexes
295 [initial][middle][final][CHEWING_ZERO_TONE];
297 result |= phrases->search
298 (m_options, phrase_length - 1, keys + 1, ranges);
300 phrases = m_chewing_length_indexes
301 [initial][middle][final][(ChewingTone) first_key.m_tone];
303 result |= phrases->search
304 (m_options, phrase_length - 1, keys + 1, ranges);
312 ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
313 m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
316 ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
317 #define CASE(len) case len: \
319 ChewingArrayIndexLevel<len> * & array = g_array_index \
320 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
327 for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
350 g_array_free(m_chewing_array_indexes, TRUE);
354 int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
355 /* in */ const ChewingKey keys[],
356 /* out */ PhraseIndexRanges ranges) const {
357 int result = SEARCH_NONE;
358 if ((int) m_chewing_array_indexes->len < phrase_length + 1)
360 if ((int) m_chewing_array_indexes->len > phrase_length + 1)
361 result |= SEARCH_CONTINUED;
363 #define CASE(len) case len: \
365 ChewingArrayIndexLevel<len> * & array = g_array_index \
366 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
369 result |= array->search(options, keys, ranges); \
373 switch (phrase_length) {
398 template<size_t phrase_length>
399 int ChewingArrayIndexLevel<phrase_length>::search
400 (pinyin_option_t options, /* in */ const ChewingKey keys[],
401 /* out */ PhraseIndexRanges ranges) const {
402 IndexItem * chunk_begin = NULL, * chunk_end = NULL;
403 chunk_begin = (IndexItem *) m_chunk.begin();
404 chunk_end = (IndexItem *) m_chunk.end();
407 ChewingKey left_keys[phrase_length], right_keys[phrase_length];
408 compute_lower_value2(options, keys, left_keys, phrase_length);
409 compute_upper_value2(options, keys, right_keys, phrase_length);
411 IndexItem left(left_keys, -1), right(right_keys, -1);
413 IndexItem * begin = std_lite::lower_bound
414 (chunk_begin, chunk_end, left,
415 phrase_exact_less_than2<phrase_length>);
416 IndexItem * end = std_lite::upper_bound
417 (chunk_begin, chunk_end, right,
418 phrase_exact_less_than2<phrase_length>);
420 return convert(options, keys, begin, end, ranges);
423 /* compress consecutive tokens */
424 template<size_t phrase_length>
425 int ChewingArrayIndexLevel<phrase_length>::convert
426 (pinyin_option_t options, const ChewingKey keys[],
427 IndexItem * begin, IndexItem * end,
428 PhraseIndexRanges ranges) const {
429 IndexItem * iter = NULL;
430 PhraseIndexRange cursor;
431 GArray * head, * cursor_head = NULL;
433 int result = SEARCH_NONE;
434 /* TODO: check the below code */
435 cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
436 for (iter = begin; iter != end; ++iter) {
437 if (0 != pinyin_compare_with_ambiguities2
438 (options, keys, iter->m_keys, phrase_length))
441 phrase_token_t token = iter->m_token;
442 head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
448 if (null_token == cursor.m_range_begin) {
449 cursor.m_range_begin = token;
450 cursor.m_range_end = token + 1;
452 } else if (cursor.m_range_end == token &&
453 PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
454 PHRASE_INDEX_LIBRARY_INDEX(token)) {
455 ++cursor.m_range_end;
457 g_array_append_val(cursor_head, cursor);
458 cursor.m_range_begin = token; cursor.m_range_end = token + 1;
463 if (null_token == cursor.m_range_begin)
466 g_array_append_val(cursor_head, cursor);
471 /* add/remove index method */
473 int ChewingBitmapIndexLevel::add_index(int phrase_length,
474 /* in */ const ChewingKey keys[],
475 /* in */ phrase_token_t token) {
476 const ChewingKey first_key = keys[0];
477 ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
478 [first_key.m_initial][first_key.m_middle]
479 [first_key.m_final][first_key.m_tone];
481 if (NULL == length_array) {
482 length_array = new ChewingLengthIndexLevel();
485 return length_array->add_index(phrase_length - 1, keys + 1, token);
488 int ChewingBitmapIndexLevel::remove_index(int phrase_length,
489 /* in */ const ChewingKey keys[],
490 /* in */ phrase_token_t token) {
491 const ChewingKey first_key = keys[0];
492 ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
493 [first_key.m_initial][first_key.m_middle]
494 [first_key.m_final][first_key.m_tone];
496 if (NULL == length_array)
497 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
499 int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
501 /* remove empty array. */
502 if (0 == length_array->get_length()) {
510 int ChewingLengthIndexLevel::add_index(int phrase_length,
511 /* in */ const ChewingKey keys[],
512 /* in */ phrase_token_t token) {
513 if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
514 return ERROR_PHRASE_TOO_LONG;
516 if ((int) m_chewing_array_indexes->len <= phrase_length)
517 g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
519 #define CASE(len) case len: \
521 ChewingArrayIndexLevel<len> * & array = g_array_index \
522 (m_chewing_array_indexes, \
523 ChewingArrayIndexLevel<len> *, len); \
525 array = new ChewingArrayIndexLevel<len>; \
526 return array->add_index(keys, token); \
529 switch(phrase_length) {
553 int ChewingLengthIndexLevel::remove_index(int phrase_length,
554 /* in */ const ChewingKey keys[],
555 /* in */ phrase_token_t token) {
556 if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
557 return ERROR_PHRASE_TOO_LONG;
559 if ((int) m_chewing_array_indexes->len <= phrase_length)
560 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
562 #define CASE(len) case len: \
564 ChewingArrayIndexLevel<len> * & array = g_array_index \
565 (m_chewing_array_indexes, \
566 ChewingArrayIndexLevel<len> *, len); \
568 return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
569 int retval = array->remove_index(keys, token); \
571 /* remove empty array. */ \
572 if (0 == array->get_length()) { \
576 /* shrink self array. */ \
577 g_array_set_size(m_chewing_array_indexes, \
583 switch (phrase_length) {
607 template<size_t phrase_length>
608 int ChewingArrayIndexLevel<phrase_length>::add_index
609 (/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
610 IndexItem * begin, * end;
612 IndexItem add_elem(keys, token);
613 begin = (IndexItem *) m_chunk.begin();
614 end = (IndexItem *) m_chunk.end();
616 std_lite::pair<IndexItem *, IndexItem *> range;
617 range = std_lite::equal_range
618 (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
620 IndexItem * cur_elem;
621 for (cur_elem = range.first;
622 cur_elem != range.second; ++cur_elem) {
623 if (cur_elem->m_token == token)
624 return ERROR_INSERT_ITEM_EXISTS;
625 if (cur_elem->m_token > token)
629 int offset = (cur_elem - begin) * sizeof(IndexItem);
630 m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
634 template<size_t phrase_length>
635 int ChewingArrayIndexLevel<phrase_length>::remove_index
636 (/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
637 IndexItem * begin, * end;
639 IndexItem remove_elem(keys, token);
640 begin = (IndexItem *) m_chunk.begin();
641 end = (IndexItem *) m_chunk.end();
643 std_lite::pair<IndexItem *, IndexItem *> range;
644 range = std_lite::equal_range
645 (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
647 IndexItem * cur_elem;
648 for (cur_elem = range.first;
649 cur_elem != range.second; ++cur_elem) {
650 if (cur_elem->m_token == token)
654 if (cur_elem == range.second)
655 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
657 int offset = (cur_elem - begin) * sizeof(IndexItem);
658 m_chunk.remove_content(offset, sizeof(IndexItem));
663 /* load text method */
664 bool ChewingLargeTable::load_text(FILE * infile) {
667 phrase_token_t token;
670 while (!feof(infile)) {
671 int num = fscanf(infile, "%s %s %u %ld",
672 pinyin, phrase, &token, &freq);
680 glong len = g_utf8_strlen(phrase, -1);
682 FullPinyinParser2 parser;
683 ChewingKeyVector keys;
684 ChewingKeyRestVector key_rests;
686 keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
687 key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
689 pinyin_option_t options = USE_TONE;
690 parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
692 if (len != keys->len) {
693 fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
694 pinyin, phrase, token, freq);
698 add_index(keys->len, (ChewingKey *)keys->data, token);
700 g_array_free(keys, TRUE);
701 g_array_free(key_rests, TRUE);
708 /* load/store method */
710 bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
711 table_offset_t end) {
713 char * begin = (char *) chunk->begin();
714 table_offset_t phrase_begin, phrase_end;
715 table_offset_t * index = (table_offset_t *) (begin + offset);
718 for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
719 for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
720 for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
721 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
722 phrase_begin = phrase_end;
726 if (phrase_begin == phrase_end) /* null pointer */
729 /* after reset() all phrases are null pointer. */
730 ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
731 m_chewing_length_indexes[k][l][m][n] = phrases;
733 phrases->load(chunk, phrase_begin, phrase_end - 1);
734 assert(phrase_end <= end);
735 assert(*(begin + phrase_end - 1) == c_separate);
738 offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
739 assert(c_separate == *(begin + offset));
743 bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
744 table_offset_t offset,
745 table_offset_t & end) {
746 table_offset_t phrase_end;
747 table_offset_t index = offset;
748 offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
751 new_chunk->set_content(offset, &c_separate, sizeof(char));
752 offset += sizeof(char);
753 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
754 index += sizeof(table_offset_t);
756 for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
757 for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
758 for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
759 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
760 ChewingLengthIndexLevel * phrases =
761 m_chewing_length_indexes[k][l][m][n];
763 if (NULL == phrases) { /* null pointer */
764 new_chunk->set_content(index, &offset,
765 sizeof(table_offset_t));
766 index += sizeof(table_offset_t);
771 phrases->store(new_chunk, offset, phrase_end);
775 new_chunk->set_content(offset, &c_separate, sizeof(char));
776 offset += sizeof(char);
777 new_chunk->set_content(index, &offset,
778 sizeof(table_offset_t));
779 index += sizeof(table_offset_t);
786 bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
787 table_offset_t end) {
788 char * begin = (char *) chunk->begin();
789 guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
790 table_offset_t * index = (table_offset_t *)
791 (begin + offset + sizeof(guint32));
793 table_offset_t phrase_begin, phrase_end = *index;
794 g_array_set_size(m_chewing_array_indexes, 0);
795 for (guint32 i = 0; i < nindex; ++i) {
796 phrase_begin = phrase_end;
800 if (phrase_begin == phrase_end) {
802 g_array_append_val(m_chewing_array_indexes, null);
806 #define CASE(len) case len: \
808 ChewingArrayIndexLevel<len> * phrase = \
809 new ChewingArrayIndexLevel<len>; \
810 phrase->load(chunk, phrase_begin, phrase_end - 1); \
811 assert(*(begin + phrase_end - 1) == c_separate); \
812 assert(phrase_end <= end); \
813 g_array_append_val(m_chewing_array_indexes, phrase); \
842 offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
843 assert(c_separate == *(begin + offset));
847 bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
848 table_offset_t offset,
849 table_offset_t & end) {
850 guint32 nindex = m_chewing_array_indexes->len; /* number of index */
851 new_chunk->set_content(offset, &nindex, sizeof(guint32));
852 table_offset_t index = offset + sizeof(guint32);
854 offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
855 new_chunk->set_content(offset, &c_separate, sizeof(char));
856 offset += sizeof(char);
857 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
858 index += sizeof(table_offset_t);
860 table_offset_t phrase_end;
861 for (guint32 i = 0; i < nindex; ++i) {
862 #define CASE(len) case len: \
864 ChewingArrayIndexLevel<len> * phrase = g_array_index \
865 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
866 if (NULL == phrase) { \
867 new_chunk->set_content \
868 (index, &offset, sizeof(table_offset_t)); \
869 index += sizeof(table_offset_t); \
872 phrase->store(new_chunk, offset, phrase_end); \
873 offset = phrase_end; \
900 new_chunk->set_content(offset, &c_separate, sizeof(char));
901 offset += sizeof(char);
902 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
903 index += sizeof(table_offset_t);
910 template<size_t phrase_length>
911 bool ChewingArrayIndexLevel<phrase_length>::
912 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
913 char * begin = (char *) chunk->begin();
914 m_chunk.set_chunk(begin + offset, end - offset, NULL);
918 template<size_t phrase_length>
919 bool ChewingArrayIndexLevel<phrase_length>::
920 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
921 new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
922 end = offset + m_chunk.size();
927 /* get length method */
929 int ChewingLengthIndexLevel::get_length() const {
930 int length = m_chewing_array_indexes->len;
932 /* trim trailing zero. */
933 for (int i = length - 1; i >= 0; --i) {
934 void * array = g_array_index(m_chewing_array_indexes, void *, i);
945 template<size_t phrase_length>
946 int ChewingArrayIndexLevel<phrase_length>::get_length() const {
947 IndexItem * chunk_begin = NULL, * chunk_end = NULL;
948 chunk_begin = (IndexItem *) m_chunk.begin();
949 chunk_end = (IndexItem *) m_chunk.end();
951 return chunk_end - chunk_begin;
955 /* mask out method */
957 bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask,
958 phrase_token_t value) {
959 for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
960 for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
961 for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
962 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
964 ChewingLengthIndexLevel * & length_array =
965 m_chewing_length_indexes[k][l][m][n];
967 if (NULL == length_array)
970 length_array->mask_out(mask, value);
972 if (0 == length_array->get_length()) {
980 bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
981 phrase_token_t value) {
982 #define CASE(len) case len: \
984 ChewingArrayIndexLevel<len> * & array = g_array_index \
985 (m_chewing_array_indexes, \
986 ChewingArrayIndexLevel<len> *, len); \
991 array->mask_out(mask, value); \
993 if (0 == array->get_length()) { \
1000 for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
1023 g_array_set_size(m_chewing_array_indexes, get_length());
1027 template<size_t phrase_length>
1028 bool ChewingArrayIndexLevel<phrase_length>::mask_out
1029 (phrase_token_t mask, phrase_token_t value) {
1030 IndexItem * begin = NULL, * end = NULL;
1031 begin = (IndexItem *) m_chunk.begin();
1032 end = (IndexItem *) m_chunk.end();
1034 for (IndexItem * cur = begin; cur != end; ++cur) {
1035 if ((cur->m_token & mask) != value)
1038 int offset = (cur - begin) * sizeof(IndexItem);
1039 m_chunk.remove_content(offset, sizeof(IndexItem));
1041 /* update chunk end. */
1042 end = (IndexItem *) m_chunk.end();