Allow libpinyin to build in cross compile mode.
[platform/upstream/libpinyin.git] / src / storage / chewing_large_table.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include "chewing_large_table.h"
23 #include <assert.h>
24 #include "pinyin_phrase2.h"
25 #include "pinyin_parser2.h"
26
27
28 /* internal class definition */
29
30 namespace pinyin{
31 class ChewingLengthIndexLevel{
32
33 protected:
34     GArray * m_chewing_array_indexes;
35
36 public:
37     /* constructor/destructor */
38     ChewingLengthIndexLevel();
39     ~ChewingLengthIndexLevel();
40
41     /* load/store method */
42     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
43     bool store(MemoryChunk * new_chunk, table_offset_t offset,
44                table_offset_t & end);
45
46     /* search method */
47     int search(pinyin_option_t options, int phrase_length,
48                /* in */ const ChewingKey keys[],
49                /* out */ PhraseIndexRanges ranges) const;
50
51     /* add/remove index method */
52     int add_index(int phrase_length, /* in */ const ChewingKey keys[],
53                   /* in */ phrase_token_t token);
54     int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
55                      /* in */ phrase_token_t token);
56
57     /* get length method */
58     int get_length() const;
59
60     /* mask out method */
61     bool mask_out(phrase_token_t mask, phrase_token_t value);
62 };
63
64
65 template<size_t phrase_length>
66 class ChewingArrayIndexLevel{
67 protected:
68     typedef PinyinIndexItem2<phrase_length> IndexItem;
69
70 protected:
71     MemoryChunk m_chunk;
72
73     /* compress consecutive tokens */
74     int convert(pinyin_option_t options,
75                 const ChewingKey keys[],
76                 IndexItem * begin,
77                 IndexItem * end,
78                 PhraseIndexRanges ranges) const;
79
80 public:
81     /* load/store method */
82     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
83     bool store(MemoryChunk * new_chunk, table_offset_t offset,
84                table_offset_t & end);
85
86     /* search method */
87     int search(pinyin_option_t options, /* in */const ChewingKey keys[],
88                /* out */ PhraseIndexRanges ranges) const;
89
90     /* add/remove index method */
91     int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token);
92     int remove_index(/* in */ const ChewingKey keys[],
93                      /* in */ phrase_token_t token);
94
95     /* get length method */
96     int get_length() const;
97
98     /* mask out method */
99     bool mask_out(phrase_token_t mask, phrase_token_t value);
100 };
101
102 };
103
104
105 using namespace pinyin;
106
107 /* class implementation */
108
109 ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
110     : m_options(options) {
111     memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
112 }
113
114 void ChewingBitmapIndexLevel::reset() {
115     for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
116         for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
117             for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
118                 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
119                      ++n) {
120                     ChewingLengthIndexLevel * & length_array =
121                         m_chewing_length_indexes[k][l][m][n];
122                     if (length_array)
123                         delete length_array;
124                     length_array = NULL;
125                 }
126 }
127
128
129 /* search method */
130
131 int ChewingBitmapIndexLevel::search(int phrase_length,
132                                     /* in */ const ChewingKey keys[],
133                                     /* out */ PhraseIndexRanges ranges) const {
134     assert(phrase_length > 0);
135     return initial_level_search(phrase_length, keys, ranges);
136 }
137
138 int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
139     /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
140
141 /* macros */
142 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
143     {                                                                   \
144         result |= middle_and_final_level_search(ORIGIN, phrase_length,  \
145                                                 keys, ranges);          \
146         if (m_options & AMBIGUITY) {                                    \
147             result |= middle_and_final_level_search(ANOTHER,            \
148                                                     phrase_length,      \
149                                                     keys, ranges);      \
150         }                                                               \
151         return result;                                                  \
152     }
153
154     /* deal with ambiguities */
155     int result = SEARCH_NONE;
156     const ChewingKey & first_key = keys[0];
157
158     switch(first_key.m_initial) {
159         MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
160         MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
161         MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
162         MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
163         MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
164         MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
165         MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
166         MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
167         MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
168         MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
169         MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
170         MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
171
172     case CHEWING_L:
173         {
174             result |= middle_and_final_level_search
175                 (CHEWING_L, phrase_length, keys, ranges);
176
177             if (m_options & PINYIN_AMB_L_N)
178                 result |= middle_and_final_level_search
179                     (CHEWING_N, phrase_length, keys,ranges);
180
181             if (m_options & PINYIN_AMB_L_R)
182                 result |= middle_and_final_level_search
183                     (CHEWING_R, phrase_length, keys, ranges);
184             return result;
185         }
186     default:
187         {
188             result |= middle_and_final_level_search
189                 ((ChewingInitial) first_key.m_initial,
190                  phrase_length, keys, ranges);
191             return result;
192         }
193     }
194 #undef MATCH
195     return result;
196 }
197
198
199 int ChewingBitmapIndexLevel::middle_and_final_level_search
200 (ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[],
201  /* out */ PhraseIndexRanges ranges) const {
202
203 /* macros */
204 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
205     {                                                                   \
206         result = tone_level_search                                      \
207             (initial, middle,                                           \
208              ORIGIN, phrase_length, keys, ranges);                      \
209         if (m_options & AMBIGUITY) {                                    \
210             result |= tone_level_search                                 \
211                 (initial, middle,                                       \
212                  ANOTHER, phrase_length, keys, ranges);                 \
213         }                                                               \
214         return result;                                                  \
215     }
216
217     int result = SEARCH_NONE;
218     const ChewingKey & first_key = keys[0];
219     const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
220
221     switch(first_key.m_final) {
222     case CHEWING_ZERO_FINAL:
223         {
224             if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
225                 if (!(m_options & PINYIN_INCOMPLETE))
226                     return result;
227                 for (int m = CHEWING_ZERO_MIDDLE;
228                      m < CHEWING_NUMBER_OF_MIDDLES; ++m)
229                     for (int n = CHEWING_ZERO_FINAL;
230                          n < CHEWING_NUMBER_OF_FINALS; ++n) {
231
232                         if (CHEWING_ZERO_MIDDLE == m &&
233                             CHEWING_ZERO_FINAL == n)
234                             continue;
235
236                         result |= tone_level_search
237                             (initial, (ChewingMiddle) m, (ChewingFinal) n,
238                              phrase_length, keys, ranges);
239                     }
240                 return result;
241             } else { /* normal pinyin */
242                 result |= tone_level_search
243                     (initial, middle, CHEWING_ZERO_FINAL,
244                      phrase_length, keys, ranges);
245                 return result;
246             }
247         }
248
249         MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
250         MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
251         MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
252         MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
253         MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
254         MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
255
256     default:
257         {
258             result |= tone_level_search
259                 (initial, middle, (ChewingFinal) first_key.m_final,
260                  phrase_length, keys, ranges);
261             return result;
262         }
263     }
264 #undef MATCH
265     return result;
266 }
267
268
269 int ChewingBitmapIndexLevel::tone_level_search
270 (ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
271  int phrase_length, /* in */ const ChewingKey keys[],
272  /* out */ PhraseIndexRanges ranges) const {
273
274     int result = SEARCH_NONE;
275     const ChewingKey & first_key = keys[0];
276
277     switch (first_key.m_tone) {
278     case CHEWING_ZERO_TONE:
279         {
280             /* deal with zero tone in chewing large table. */
281             for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
282                 ChewingLengthIndexLevel * phrases =
283                     m_chewing_length_indexes
284                     [initial][middle][final][(ChewingTone)i];
285                 if (phrases)
286                     result |= phrases->search
287                         (m_options, phrase_length - 1, keys + 1, ranges);
288             }
289             return result;
290         }
291     default:
292         {
293             ChewingLengthIndexLevel * phrases =
294                 m_chewing_length_indexes
295                 [initial][middle][final][CHEWING_ZERO_TONE];
296             if (phrases)
297                 result |= phrases->search
298                     (m_options, phrase_length - 1, keys + 1, ranges);
299
300             phrases = m_chewing_length_indexes
301                 [initial][middle][final][(ChewingTone) first_key.m_tone];
302             if (phrases)
303                 result |= phrases->search
304                     (m_options, phrase_length - 1, keys + 1, ranges);
305             return result;
306         }
307     }
308     return result;
309 }
310
311
312 ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
313     m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
314 }
315
316 ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
317 #define CASE(len) case len:                                             \
318     {                                                                   \
319         ChewingArrayIndexLevel<len> * & array = g_array_index           \
320             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
321         if (array)                                                      \
322             delete array;                                               \
323         array = NULL;                                                   \
324         break;                                                          \
325     }
326
327     for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
328         switch (i){
329             CASE(0);
330             CASE(1);
331             CASE(2);
332             CASE(3);
333             CASE(4);
334             CASE(5);
335             CASE(6);
336             CASE(7);
337             CASE(8);
338             CASE(9);
339             CASE(10);
340             CASE(11);
341             CASE(12);
342             CASE(13);
343             CASE(14);
344             CASE(15);
345         default:
346             assert(false);
347         }
348     }
349 #undef CASE
350     g_array_free(m_chewing_array_indexes, TRUE);
351 }
352
353
354 int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
355                                     /* in */ const ChewingKey keys[],
356                                     /* out */ PhraseIndexRanges ranges) const {
357     int result = SEARCH_NONE;
358     if ((int) m_chewing_array_indexes->len < phrase_length + 1)
359         return result;
360     if ((int) m_chewing_array_indexes->len > phrase_length + 1)
361         result |= SEARCH_CONTINUED;
362
363 #define CASE(len) case len:                                             \
364     {                                                                   \
365         ChewingArrayIndexLevel<len> * & array = g_array_index           \
366             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
367         if (!array)                                                     \
368             return result;                                              \
369         result |= array->search(options, keys, ranges);                 \
370         return result;                                                  \
371     }
372
373     switch (phrase_length) {
374         CASE(0);
375         CASE(1);
376         CASE(2);
377         CASE(3);
378         CASE(4);
379         CASE(5);
380         CASE(6);
381         CASE(7);
382         CASE(8);
383         CASE(9);
384         CASE(10);
385         CASE(11);
386         CASE(12);
387         CASE(13);
388         CASE(14);
389         CASE(15);
390     default:
391         assert(false);
392     }
393
394 #undef CASE
395 }
396
397
398 template<size_t phrase_length>
399 int ChewingArrayIndexLevel<phrase_length>::search
400 (pinyin_option_t options, /* in */ const ChewingKey keys[],
401  /* out */ PhraseIndexRanges ranges) const {
402     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
403     chunk_begin = (IndexItem *) m_chunk.begin();
404     chunk_end = (IndexItem *) m_chunk.end();
405
406     /* do the search */
407     ChewingKey left_keys[phrase_length], right_keys[phrase_length];
408     compute_lower_value2(options, keys, left_keys, phrase_length);
409     compute_upper_value2(options, keys, right_keys, phrase_length);
410
411     IndexItem left(left_keys, -1), right(right_keys, -1);
412
413     IndexItem * begin = std_lite::lower_bound
414         (chunk_begin, chunk_end, left,
415          phrase_exact_less_than2<phrase_length>);
416     IndexItem * end   = std_lite::upper_bound
417         (chunk_begin, chunk_end, right,
418          phrase_exact_less_than2<phrase_length>);
419
420     return convert(options, keys, begin, end, ranges);
421 }
422
423 /* compress consecutive tokens */
424 template<size_t phrase_length>
425 int ChewingArrayIndexLevel<phrase_length>::convert
426 (pinyin_option_t options, const ChewingKey keys[],
427  IndexItem * begin, IndexItem * end,
428  PhraseIndexRanges ranges) const {
429     IndexItem * iter = NULL;
430     PhraseIndexRange cursor;
431     GArray * head, * cursor_head = NULL;
432
433     int result = SEARCH_NONE;
434     /* TODO: check the below code */
435     cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
436     for (iter = begin; iter != end; ++iter) {
437         if (0 != pinyin_compare_with_ambiguities2
438             (options, keys, iter->m_keys, phrase_length))
439             continue;
440
441         phrase_token_t token = iter->m_token;
442         head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
443         if (NULL == head)
444             continue;
445
446         result |= SEARCH_OK;
447
448         if (null_token == cursor.m_range_begin) {
449             cursor.m_range_begin = token;
450             cursor.m_range_end   = token + 1;
451             cursor_head = head;
452         } else if (cursor.m_range_end == token &&
453                    PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
454                    PHRASE_INDEX_LIBRARY_INDEX(token)) {
455             ++cursor.m_range_end;
456         } else {
457             g_array_append_val(cursor_head, cursor);
458             cursor.m_range_begin = token; cursor.m_range_end = token + 1;
459             cursor_head = head;
460         }
461     }
462
463     if (null_token == cursor.m_range_begin)
464         return result;
465
466     g_array_append_val(cursor_head, cursor);
467     return result;
468 }
469
470
471 /* add/remove index method */
472
473 int ChewingBitmapIndexLevel::add_index(int phrase_length,
474                                        /* in */ const ChewingKey keys[],
475                                        /* in */ phrase_token_t token) {
476     const ChewingKey first_key = keys[0];
477     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
478         [first_key.m_initial][first_key.m_middle]
479         [first_key.m_final][first_key.m_tone];
480
481     if (NULL == length_array) {
482         length_array = new ChewingLengthIndexLevel();
483     }
484
485     return length_array->add_index(phrase_length - 1, keys + 1, token);
486 }
487
488 int ChewingBitmapIndexLevel::remove_index(int phrase_length,
489                                           /* in */ const ChewingKey keys[],
490                                           /* in */ phrase_token_t token) {
491     const ChewingKey first_key = keys[0];
492     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
493         [first_key.m_initial][first_key.m_middle]
494         [first_key.m_final][first_key.m_tone];
495
496     if (NULL == length_array)
497         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
498
499     int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
500
501     /* remove empty array. */
502     if (0 == length_array->get_length()) {
503         delete length_array;
504         length_array = NULL;
505     }
506
507     return retval;
508 }
509
510 int ChewingLengthIndexLevel::add_index(int phrase_length,
511                                        /* in */ const ChewingKey keys[],
512                                        /* in */ phrase_token_t token) {
513     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
514         return ERROR_PHRASE_TOO_LONG;
515
516     if ((int) m_chewing_array_indexes->len <= phrase_length)
517         g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
518
519 #define CASE(len) case len:                                     \
520     {                                                           \
521         ChewingArrayIndexLevel<len> * & array = g_array_index   \
522             (m_chewing_array_indexes,                           \
523              ChewingArrayIndexLevel<len> *, len);               \
524         if (NULL == array)                                      \
525             array = new ChewingArrayIndexLevel<len>;            \
526         return array->add_index(keys, token);                   \
527     }
528
529     switch(phrase_length) {
530         CASE(0);
531         CASE(1);
532         CASE(2);
533         CASE(3);
534         CASE(4);
535         CASE(5);
536         CASE(6);
537         CASE(7);
538         CASE(8);
539         CASE(9);
540         CASE(10);
541         CASE(11);
542         CASE(12);
543         CASE(13);
544         CASE(14);
545         CASE(15);
546     default:
547         assert(false);
548     }
549
550 #undef CASE
551 }
552
553 int ChewingLengthIndexLevel::remove_index(int phrase_length,
554                                           /* in */ const ChewingKey keys[],
555                                           /* in */ phrase_token_t token) {
556     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
557         return ERROR_PHRASE_TOO_LONG;
558
559     if ((int) m_chewing_array_indexes->len <= phrase_length)
560         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
561
562 #define CASE(len) case len:                                     \
563     {                                                           \
564         ChewingArrayIndexLevel<len> * & array = g_array_index   \
565             (m_chewing_array_indexes,                           \
566              ChewingArrayIndexLevel<len> *, len);               \
567         if (NULL == array)                                      \
568             return ERROR_REMOVE_ITEM_DONOT_EXISTS;              \
569         int retval = array->remove_index(keys, token);          \
570                                                                 \
571         /* remove empty array. */                               \
572         if (0 == array->get_length()) {                         \
573             delete array;                                       \
574             array = NULL;                                       \
575                                                                 \
576             /* shrink self array. */                            \
577             g_array_set_size(m_chewing_array_indexes,           \
578                              get_length());                     \
579         }                                                       \
580         return retval;                                          \
581     }
582
583     switch (phrase_length) {
584         CASE(0);
585         CASE(1);
586         CASE(2);
587         CASE(3);
588         CASE(4);
589         CASE(5);
590         CASE(6);
591         CASE(7);
592         CASE(8);
593         CASE(9);
594         CASE(10);
595         CASE(11);
596         CASE(12);
597         CASE(13);
598         CASE(14);
599         CASE(15);
600     default:
601         assert(false);
602     }
603
604 #undef CASE
605 }
606
607 template<size_t phrase_length>
608 int ChewingArrayIndexLevel<phrase_length>::add_index
609 (/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
610     IndexItem * begin, * end;
611
612     IndexItem add_elem(keys, token);
613     begin = (IndexItem *) m_chunk.begin();
614     end   = (IndexItem *) m_chunk.end();
615
616     std_lite::pair<IndexItem *, IndexItem *> range;
617     range = std_lite::equal_range
618         (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
619
620     IndexItem * cur_elem;
621     for (cur_elem = range.first;
622          cur_elem != range.second; ++cur_elem) {
623         if (cur_elem->m_token == token)
624             return ERROR_INSERT_ITEM_EXISTS;
625         if (cur_elem->m_token > token)
626             break;
627     }
628
629     int offset = (cur_elem - begin) * sizeof(IndexItem);
630     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
631     return ERROR_OK;
632 }
633
634 template<size_t phrase_length>
635 int ChewingArrayIndexLevel<phrase_length>::remove_index
636 (/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
637     IndexItem * begin, * end;
638
639     IndexItem remove_elem(keys, token);
640     begin = (IndexItem *) m_chunk.begin();
641     end   = (IndexItem *) m_chunk.end();
642
643     std_lite::pair<IndexItem *, IndexItem *> range;
644     range = std_lite::equal_range
645         (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
646
647     IndexItem * cur_elem;
648     for (cur_elem = range.first;
649          cur_elem != range.second; ++cur_elem) {
650         if (cur_elem->m_token == token)
651             break;
652     }
653
654     if (cur_elem == range.second)
655         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
656
657     int offset = (cur_elem - begin) * sizeof(IndexItem);
658     m_chunk.remove_content(offset, sizeof(IndexItem));
659     return ERROR_OK;
660 }
661
662
663 /* load text method */
664 bool ChewingLargeTable::load_text(FILE * infile) {
665     char pinyin[256];
666     char phrase[256];
667     phrase_token_t token;
668     size_t freq;
669
670     while (!feof(infile)) {
671         int num = fscanf(infile, "%s %s %u %ld",
672                          pinyin, phrase, &token, &freq);
673
674         if (4 != num)
675             continue;
676
677         if(feof(infile))
678             break;
679
680         glong len = g_utf8_strlen(phrase, -1);
681
682         FullPinyinParser2 parser;
683         ChewingKeyVector keys;
684         ChewingKeyRestVector key_rests;
685
686         keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
687         key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
688
689         pinyin_option_t options = USE_TONE;
690         parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
691
692         if (len != keys->len) {
693             fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
694                     pinyin, phrase, token, freq);
695             continue;
696         }
697
698         add_index(keys->len, (ChewingKey *)keys->data, token);
699
700         g_array_free(keys, TRUE);
701         g_array_free(key_rests, TRUE);
702     }
703
704     return true;
705 }
706
707
708 /* load/store method */
709
710 bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
711                                    table_offset_t end) {
712     reset();
713     char * begin = (char *) chunk->begin();
714     table_offset_t phrase_begin, phrase_end;
715     table_offset_t * index = (table_offset_t *) (begin + offset);
716     phrase_end = *index;
717
718     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
719         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
720             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
721                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
722                     phrase_begin = phrase_end;
723                     index++;
724                     phrase_end = *index;
725
726                     if (phrase_begin == phrase_end) /* null pointer */
727                         continue;
728
729                     /* after reset() all phrases are null pointer. */
730                     ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
731                     m_chewing_length_indexes[k][l][m][n] = phrases;
732
733                     phrases->load(chunk, phrase_begin, phrase_end - 1);
734                     assert(phrase_end <= end);
735                     assert(*(begin + phrase_end - 1)  == c_separate);
736                 }
737
738     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
739     assert(c_separate == *(begin + offset));
740     return true;
741 }
742
743 bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
744                                     table_offset_t offset,
745                                     table_offset_t & end) {
746     table_offset_t phrase_end;
747     table_offset_t index = offset;
748     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
749
750     /* add '#' */
751     new_chunk->set_content(offset, &c_separate, sizeof(char));
752     offset += sizeof(char);
753     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
754     index += sizeof(table_offset_t);
755
756     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
757         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
758             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
759                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
760                     ChewingLengthIndexLevel * phrases =
761                         m_chewing_length_indexes[k][l][m][n];
762
763                     if (NULL == phrases) { /* null pointer */
764                         new_chunk->set_content(index, &offset,
765                                                sizeof(table_offset_t));
766                         index += sizeof(table_offset_t);
767                         continue;
768                     }
769
770                     /* has a end '#' */
771                     phrases->store(new_chunk, offset, phrase_end);
772                     offset = phrase_end;
773
774                     /* add '#' */
775                     new_chunk->set_content(offset, &c_separate, sizeof(char));
776                     offset += sizeof(char);
777                     new_chunk->set_content(index, &offset,
778                                            sizeof(table_offset_t));
779                     index += sizeof(table_offset_t);
780                 }
781
782     end = offset;
783     return true;
784 }
785
786 bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
787                                    table_offset_t end) {
788     char * begin = (char *) chunk->begin();
789     guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
790     table_offset_t * index = (table_offset_t *)
791         (begin + offset + sizeof(guint32));
792
793     table_offset_t phrase_begin, phrase_end = *index;
794     g_array_set_size(m_chewing_array_indexes, 0);
795     for (guint32 i = 0; i < nindex; ++i) {
796         phrase_begin = phrase_end;
797         index++;
798         phrase_end = *index;
799
800         if (phrase_begin == phrase_end) {
801             void * null = NULL;
802             g_array_append_val(m_chewing_array_indexes, null);
803             continue;
804         }
805
806 #define CASE(len) case len:                                             \
807         {                                                               \
808             ChewingArrayIndexLevel<len> * phrase =                      \
809                 new ChewingArrayIndexLevel<len>;                        \
810             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
811             assert(*(begin + phrase_end - 1) == c_separate);            \
812             assert(phrase_end <= end);                                  \
813             g_array_append_val(m_chewing_array_indexes, phrase);        \
814             break;                                                      \
815         }
816
817         switch ( i ){
818             CASE(0);
819             CASE(1);
820             CASE(2);
821             CASE(3);
822             CASE(4);
823             CASE(5);
824             CASE(6);
825             CASE(7);
826             CASE(8);
827             CASE(9);
828             CASE(10);
829             CASE(11);
830             CASE(12);
831             CASE(13);
832             CASE(14);
833             CASE(15);
834         default:
835             assert(false);
836         }
837
838 #undef CASE
839     }
840
841     /* check '#' */
842     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
843     assert(c_separate == *(begin + offset));
844     return true;
845 }
846
847 bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
848                                     table_offset_t offset,
849                                     table_offset_t & end) {
850     guint32 nindex = m_chewing_array_indexes->len; /* number of index */
851     new_chunk->set_content(offset, &nindex, sizeof(guint32));
852     table_offset_t index = offset + sizeof(guint32);
853
854     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
855     new_chunk->set_content(offset, &c_separate, sizeof(char));
856     offset += sizeof(char);
857     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
858     index += sizeof(table_offset_t);
859
860     table_offset_t phrase_end;
861     for (guint32 i = 0; i < nindex; ++i) {
862 #define CASE(len) case len:                                             \
863         {                                                               \
864             ChewingArrayIndexLevel<len> * phrase = g_array_index        \
865                 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
866             if (NULL == phrase) {                                       \
867                 new_chunk->set_content                                  \
868                     (index, &offset, sizeof(table_offset_t));           \
869                 index += sizeof(table_offset_t);                        \
870                 continue;                                               \
871             }                                                           \
872             phrase->store(new_chunk, offset, phrase_end);               \
873             offset = phrase_end;                                        \
874             break;                                                      \
875         }
876
877         switch ( i ){
878             CASE(0);
879             CASE(1);
880             CASE(2);
881             CASE(3);
882             CASE(4);
883             CASE(5);
884             CASE(6);
885             CASE(7);
886             CASE(8);
887             CASE(9);
888             CASE(10);
889             CASE(11);
890             CASE(12);
891             CASE(13);
892             CASE(14);
893             CASE(15);
894         default:
895             assert(false);
896         }
897 #undef CASE
898
899         /* add '#' */
900         new_chunk->set_content(offset, &c_separate, sizeof(char));
901         offset += sizeof(char);
902         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
903         index += sizeof(table_offset_t);
904     }
905
906     end = offset;
907     return true;
908 }
909
910 template<size_t phrase_length>
911 bool ChewingArrayIndexLevel<phrase_length>::
912 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
913     char * begin = (char *) chunk->begin();
914     m_chunk.set_chunk(begin + offset, end - offset, NULL);
915     return true;
916 }
917
918 template<size_t phrase_length>
919 bool ChewingArrayIndexLevel<phrase_length>::
920 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
921     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
922     end = offset + m_chunk.size();
923     return true;
924 }
925
926
927 /* get length method */
928
929 int ChewingLengthIndexLevel::get_length() const {
930     int length = m_chewing_array_indexes->len;
931
932     /* trim trailing zero. */
933     for (int i = length - 1; i >= 0; --i) {
934         void * array = g_array_index(m_chewing_array_indexes, void *, i);
935
936         if (NULL != array)
937             break;
938
939         --length;
940     }
941
942     return length;
943 }
944
945 template<size_t phrase_length>
946 int ChewingArrayIndexLevel<phrase_length>::get_length() const {
947     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
948     chunk_begin = (IndexItem *) m_chunk.begin();
949     chunk_end = (IndexItem *) m_chunk.end();
950
951     return chunk_end - chunk_begin;
952 }
953
954
955 /* mask out method */
956
957 bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask,
958                                        phrase_token_t value) {
959     for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
960         for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
961             for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
962                 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
963                      ++n) {
964                     ChewingLengthIndexLevel * & length_array =
965                         m_chewing_length_indexes[k][l][m][n];
966
967                     if (NULL == length_array)
968                         continue;
969
970                     length_array->mask_out(mask, value);
971
972                     if (0 == length_array->get_length()) {
973                         delete length_array;
974                         length_array = NULL;
975                     }
976                 }
977     return true;
978 }
979
980 bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
981                                        phrase_token_t value) {
982 #define CASE(len) case len:                                     \
983     {                                                           \
984         ChewingArrayIndexLevel<len> * & array = g_array_index   \
985             (m_chewing_array_indexes,                           \
986              ChewingArrayIndexLevel<len> *, len);               \
987                                                                 \
988         if (NULL == array)                                      \
989             continue;                                           \
990                                                                 \
991         array->mask_out(mask, value);                           \
992                                                                 \
993         if (0 == array->get_length()) {                         \
994             delete array;                                       \
995             array = NULL;                                       \
996         }                                                       \
997         break;                                                  \
998     }
999
1000     for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
1001         switch (i){
1002             CASE(0);
1003             CASE(1);
1004             CASE(2);
1005             CASE(3);
1006             CASE(4);
1007             CASE(5);
1008             CASE(6);
1009             CASE(7);
1010             CASE(8);
1011             CASE(9);
1012             CASE(10);
1013             CASE(11);
1014             CASE(12);
1015             CASE(13);
1016             CASE(14);
1017             CASE(15);
1018         default:
1019             assert(false);
1020         }
1021     }
1022 #undef CASE
1023     g_array_set_size(m_chewing_array_indexes, get_length());
1024     return true;
1025 }
1026
1027 template<size_t phrase_length>
1028 bool ChewingArrayIndexLevel<phrase_length>::mask_out
1029 (phrase_token_t mask, phrase_token_t value) {
1030     IndexItem * begin = NULL, * end = NULL;
1031     begin = (IndexItem *) m_chunk.begin();
1032     end   = (IndexItem *) m_chunk.end();
1033
1034     for (IndexItem * cur = begin; cur != end; ++cur) {
1035         if ((cur->m_token & mask) != value)
1036             continue;
1037
1038         int offset = (cur - begin) * sizeof(IndexItem);
1039         m_chunk.remove_content(offset, sizeof(IndexItem));
1040
1041         /* update chunk end. */
1042         end = (IndexItem *) m_chunk.end();
1043         --cur;
1044     }
1045
1046     return true;
1047 }