7639dfe7425c8877807e89be95cfd62226605c39
[platform/upstream/libpinyin.git] / src / storage / chewing_large_table.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include "chewing_large_table.h"
23 #include <assert.h>
24 #include "pinyin_phrase2.h"
25 #include "pinyin_parser2.h"
26
27
28 /* internal class definition */
29
30 namespace pinyin{
31 class ChewingLengthIndexLevel{
32
33 protected:
34     GArray * m_chewing_array_indexes;
35
36 public:
37     /* constructor/destructor */
38     ChewingLengthIndexLevel();
39     ~ChewingLengthIndexLevel();
40
41     /* load/store method */
42     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
43     bool store(MemoryChunk * new_chunk, table_offset_t offset,
44                table_offset_t & end);
45
46     /* search method */
47     int search(pinyin_option_t options, int phrase_length,
48                /* in */ ChewingKey keys[],
49                /* out */ PhraseIndexRanges ranges) const;
50
51     /* add/remove index method */
52     int add_index(int phrase_length, /* in */ ChewingKey keys[],
53                   /* in */ phrase_token_t token);
54     int remove_index(int phrase_length, /* in */ ChewingKey keys[],
55                      /* in */ phrase_token_t token);
56
57     /* get length method */
58     int get_length() const;
59 };
60
61
62 template<size_t phrase_length>
63 class ChewingArrayIndexLevel{
64 protected:
65     typedef PinyinIndexItem2<phrase_length> IndexItem;
66
67 protected:
68     MemoryChunk m_chunk;
69
70     /* compress consecutive tokens */
71     int convert(pinyin_option_t options,
72                 ChewingKey keys[],
73                 IndexItem * begin,
74                 IndexItem * end,
75                 PhraseIndexRanges ranges) const;
76
77 public:
78     /* load/store method */
79     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
80     bool store(MemoryChunk * new_chunk, table_offset_t offset,
81                table_offset_t & end);
82
83     /* search method */
84     int search(pinyin_option_t options, /* in */ChewingKey keys[],
85                /* out */ PhraseIndexRanges ranges) const;
86
87     /* add/remove index method */
88     int add_index(/* in */ ChewingKey keys[], /* in */ phrase_token_t token);
89     int remove_index(/* in */ ChewingKey keys[],
90                      /* in */ phrase_token_t token);
91
92     /* get length method */
93     int get_length() const;
94 };
95
96 };
97
98
99 using namespace pinyin;
100
101 /* class implementation */
102
103 ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
104     : m_options(options) {
105     memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
106 }
107
108 void ChewingBitmapIndexLevel::reset() {
109     for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
110         for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
111             for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
112                 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
113                      ++n) {
114                     ChewingLengthIndexLevel * & length_array =
115                         m_chewing_length_indexes[k][l][m][n];
116                     if (length_array)
117                         delete length_array;
118                     length_array = NULL;
119                 }
120 }
121
122
123 /* search method */
124
125 int ChewingBitmapIndexLevel::search(int phrase_length,
126                                     /* in */ ChewingKey keys[],
127                                     /* out */ PhraseIndexRanges ranges) const {
128     assert(phrase_length > 0);
129     return initial_level_search(phrase_length, keys, ranges);
130 }
131
132 int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
133     /* in */ ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
134
135 /* macros */
136 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
137     {                                                                   \
138         result |= middle_and_final_level_search(ORIGIN, phrase_length,  \
139                                                 keys, ranges);          \
140         if (m_options & AMBIGUITY) {                                    \
141             result |= middle_and_final_level_search(ANOTHER,            \
142                                                     phrase_length,      \
143                                                     keys, ranges);      \
144         }                                                               \
145         return result;                                                  \
146     }
147
148     /* deal with ambiguities */
149     int result = SEARCH_NONE;
150     const ChewingKey & first_key = keys[0];
151
152     switch(first_key.m_initial) {
153         MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
154         MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
155         MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
156         MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
157         MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
158         MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
159         MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
160         MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
161         MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
162         MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
163         MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
164         MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
165
166     case CHEWING_L:
167         {
168             result |= middle_and_final_level_search
169                 (CHEWING_L, phrase_length, keys, ranges);
170
171             if (m_options & PINYIN_AMB_L_N)
172                 result |= middle_and_final_level_search
173                     (CHEWING_N, phrase_length, keys,ranges);
174
175             if (m_options & PINYIN_AMB_L_R)
176                 result |= middle_and_final_level_search
177                     (CHEWING_R, phrase_length, keys, ranges);
178             return result;
179         }
180     default:
181         {
182             result |= middle_and_final_level_search
183                 ((ChewingInitial) first_key.m_initial,
184                  phrase_length, keys, ranges);
185             return result;
186         }
187     }
188 #undef MATCH
189     return result;
190 }
191
192
193 int ChewingBitmapIndexLevel::middle_and_final_level_search
194 (ChewingInitial initial, int phrase_length, /* in */ ChewingKey keys[],
195  /* out */ PhraseIndexRanges ranges) const {
196
197 /* macros */
198 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
199     {                                                                   \
200         result = tone_level_search                                      \
201             (initial, middle,                                           \
202              ORIGIN, phrase_length, keys, ranges);                      \
203         if (m_options & AMBIGUITY) {                                    \
204             result |= tone_level_search                                 \
205                 (initial, middle,                                       \
206                  ANOTHER, phrase_length, keys, ranges);                 \
207         }                                                               \
208         return result;                                                  \
209     }
210
211     int result = SEARCH_NONE;
212     const ChewingKey & first_key = keys[0];
213     const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
214
215     switch(first_key.m_final) {
216     case CHEWING_ZERO_FINAL:
217         {
218             if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
219                 if (!(m_options & PINYIN_INCOMPLETE))
220                     return result;
221                 for (int m = CHEWING_ZERO_MIDDLE;
222                      m < CHEWING_NUMBER_OF_MIDDLES; ++m)
223                     for (int n = CHEWING_ZERO_FINAL;
224                          n < CHEWING_NUMBER_OF_FINALS; ++n) {
225
226                         if (CHEWING_ZERO_MIDDLE == m &&
227                             CHEWING_ZERO_FINAL == n)
228                             continue;
229
230                         result |= tone_level_search
231                             (initial, (ChewingMiddle) m, (ChewingFinal) n,
232                              phrase_length, keys, ranges);
233                     }
234                 return result;
235             } else { /* normal pinyin */
236                 result |= tone_level_search
237                     (initial, middle, CHEWING_ZERO_FINAL,
238                      phrase_length, keys, ranges);
239                 return result;
240             }
241         }
242
243         MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
244         MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
245         MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
246         MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
247         MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
248         MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
249
250     default:
251         {
252             result |= tone_level_search
253                 (initial, middle, (ChewingFinal) first_key.m_final,
254                  phrase_length, keys, ranges);
255             return result;
256         }
257     }
258 #undef MATCH
259     return result;
260 }
261
262
263 int ChewingBitmapIndexLevel::tone_level_search
264 (ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
265  int phrase_length, /* in */ ChewingKey keys[],
266  /* out */ PhraseIndexRanges ranges) const {
267
268     int result = SEARCH_NONE;
269     const ChewingKey & first_key = keys[0];
270
271     switch (first_key.m_tone) {
272     case CHEWING_ZERO_TONE:
273         {
274             /* deal with zero tone in chewing large table. */
275             for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
276                 ChewingLengthIndexLevel * phrases =
277                     m_chewing_length_indexes
278                     [initial][middle][final][(ChewingTone)i];
279                 if (phrases)
280                     result |= phrases->search
281                         (m_options, phrase_length - 1, keys + 1, ranges);
282             }
283             return result;
284         }
285     default:
286         {
287             ChewingLengthIndexLevel * phrases =
288                 m_chewing_length_indexes
289                 [initial][middle][final][CHEWING_ZERO_TONE];
290             if (phrases)
291                 result |= phrases->search
292                     (m_options, phrase_length - 1, keys + 1, ranges);
293
294             phrases = m_chewing_length_indexes
295                 [initial][middle][final][(ChewingTone) first_key.m_tone];
296             if (phrases)
297                 result |= phrases->search
298                     (m_options, phrase_length - 1, keys + 1, ranges);
299             return result;
300         }
301     }
302     return result;
303 }
304
305
306 ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
307     m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
308 }
309
310 ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
311 #define CASE(len) case len:                                             \
312     {                                                                   \
313         ChewingArrayIndexLevel<len> * & array = g_array_index           \
314             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
315         if (array)                                                      \
316             delete array;                                               \
317         array = NULL;                                                   \
318         break;                                                          \
319     }
320
321     for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
322         switch (i){
323             CASE(0);
324             CASE(1);
325             CASE(2);
326             CASE(3);
327             CASE(4);
328             CASE(5);
329             CASE(6);
330             CASE(7);
331             CASE(8);
332             CASE(9);
333             CASE(10);
334             CASE(11);
335             CASE(12);
336             CASE(13);
337             CASE(14);
338             CASE(15);
339         default:
340             assert(false);
341         }
342     }
343 #undef CASE
344     g_array_free(m_chewing_array_indexes, TRUE);
345     m_chewing_array_indexes = NULL;
346 }
347
348
349 int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
350                                     /* in */ ChewingKey keys[],
351                                     /* out */ PhraseIndexRanges ranges) const {
352     int result = SEARCH_NONE;
353     if (m_chewing_array_indexes->len < phrase_length + 1)
354         return result;
355     if (m_chewing_array_indexes->len > phrase_length + 1)
356         result |= SEARCH_CONTINUED;
357
358 #define CASE(len) case len:                                             \
359     {                                                                   \
360         ChewingArrayIndexLevel<len> * & array = g_array_index           \
361             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
362         if (!array)                                                     \
363             return result;                                              \
364         result |= array->search(options, keys, ranges);                 \
365         return result;                                                  \
366     }
367
368     switch (phrase_length) {
369         CASE(0);
370         CASE(1);
371         CASE(2);
372         CASE(3);
373         CASE(4);
374         CASE(5);
375         CASE(6);
376         CASE(7);
377         CASE(8);
378         CASE(9);
379         CASE(10);
380         CASE(11);
381         CASE(12);
382         CASE(13);
383         CASE(14);
384         CASE(15);
385     default:
386         assert(false);
387     }
388
389 #undef CASE
390 }
391
392
393 template<size_t phrase_length>
394 int ChewingArrayIndexLevel<phrase_length>::search
395 (pinyin_option_t options, /* in */ChewingKey keys[],
396  /* out */ PhraseIndexRanges ranges) const {
397     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
398     chunk_begin = (IndexItem *) m_chunk.begin();
399     chunk_end = (IndexItem *) m_chunk.end();
400
401     /* do the search */
402     ChewingKey left_keys[phrase_length], right_keys[phrase_length];
403     compute_lower_value2(options, keys, left_keys, phrase_length);
404     compute_upper_value2(options, keys, right_keys, phrase_length);
405
406     IndexItem left(left_keys, -1), right(right_keys, -1);
407
408     IndexItem * begin = std_lite::lower_bound
409         (chunk_begin, chunk_end, left,
410          phrase_exact_less_than2<phrase_length>);
411     IndexItem * end   = std_lite::upper_bound
412         (chunk_begin, chunk_end, right,
413          phrase_exact_less_than2<phrase_length>);
414
415     return convert(options, keys, begin, end, ranges);
416 }
417
418 /* compress consecutive tokens */
419 template<size_t phrase_length>
420 int ChewingArrayIndexLevel<phrase_length>::convert
421 (pinyin_option_t options, ChewingKey keys[],
422  IndexItem * begin, IndexItem * end,
423  PhraseIndexRanges ranges) const {
424     IndexItem * iter = NULL;
425     PhraseIndexRange cursor;
426     GArray * head, * cursor_head = NULL;
427
428     int result = SEARCH_NONE;
429     /* TODO: check the below code */
430     cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
431     for (iter = begin; iter != end; ++iter) {
432         if (0 != pinyin_compare_with_ambiguities2
433             (options, keys, iter->m_keys, phrase_length))
434             continue;
435
436         phrase_token_t token = iter->m_token;
437         head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
438         if (NULL == head)
439             continue;
440
441         result |= SEARCH_OK;
442
443         if (null_token == cursor.m_range_begin) {
444             cursor.m_range_begin = token;
445             cursor.m_range_end   = token + 1;
446             cursor_head = head;
447         } else if (cursor.m_range_end == token &&
448                    PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
449                    PHRASE_INDEX_LIBRARY_INDEX(token)) {
450             ++cursor.m_range_end;
451         } else {
452             g_array_append_val(cursor_head, cursor);
453             cursor.m_range_begin = token; cursor.m_range_end = token + 1;
454             cursor_head = head;
455         }
456     }
457
458     if (null_token == cursor.m_range_begin)
459         return result;
460
461     g_array_append_val(cursor_head, cursor);
462     return result;
463 }
464
465
466 /* add/remove index method */
467
468 int ChewingBitmapIndexLevel::add_index(int phrase_length,
469                                        /* in */ ChewingKey keys[],
470                                        /* in */ phrase_token_t token) {
471     const ChewingKey first_key = keys[0];
472     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
473         [first_key.m_initial][first_key.m_middle]
474         [first_key.m_final][first_key.m_tone];
475
476     if (NULL == length_array) {
477         length_array = new ChewingLengthIndexLevel();
478     }
479
480     return length_array->add_index(phrase_length - 1, keys + 1, token);
481 }
482
483 int ChewingBitmapIndexLevel::remove_index(int phrase_length,
484                                           /* in */ ChewingKey keys[],
485                                           /* in */ phrase_token_t token) {
486     const ChewingKey first_key = keys[0];
487     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
488         [first_key.m_initial][first_key.m_middle]
489         [first_key.m_final][first_key.m_tone];
490
491     if (NULL == length_array)
492         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
493
494     int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
495
496     /* remove empty array. */
497     if (0 == length_array->get_length()) {
498         delete length_array;
499         length_array = NULL;
500     }
501
502     return retval;
503 }
504
505 int ChewingLengthIndexLevel::add_index(int phrase_length,
506                                        /* in */ ChewingKey keys[],
507                                        /* in */ phrase_token_t token) {
508     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
509         return ERROR_PHRASE_TOO_LONG;
510
511     if (m_chewing_array_indexes->len <= phrase_length)
512         g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
513
514 #define CASE(len) case len:                                     \
515     {                                                           \
516         ChewingArrayIndexLevel<len> * & array = g_array_index   \
517             (m_chewing_array_indexes,                           \
518              ChewingArrayIndexLevel<len> *, len);               \
519         if (NULL == array)                                      \
520             array = new ChewingArrayIndexLevel<len>;            \
521         return array->add_index(keys, token);                   \
522     }
523
524     switch(phrase_length) {
525         CASE(0);
526         CASE(1);
527         CASE(2);
528         CASE(3);
529         CASE(4);
530         CASE(5);
531         CASE(6);
532         CASE(7);
533         CASE(8);
534         CASE(9);
535         CASE(10);
536         CASE(11);
537         CASE(12);
538         CASE(13);
539         CASE(14);
540         CASE(15);
541     default:
542         assert(false);
543     }
544
545 #undef CASE
546 }
547
548 int ChewingLengthIndexLevel::remove_index(int phrase_length,
549                                           /* in */ ChewingKey keys[],
550                                           /* in */ phrase_token_t token) {
551     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
552         return ERROR_PHRASE_TOO_LONG;
553
554     if (m_chewing_array_indexes->len <= phrase_length)
555         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
556
557 #define CASE(len) case len:                                     \
558     {                                                           \
559         ChewingArrayIndexLevel<len> * & array = g_array_index   \
560             (m_chewing_array_indexes,                           \
561              ChewingArrayIndexLevel<len> *, len);               \
562         if (NULL == array)                                      \
563             return ERROR_REMOVE_ITEM_DONOT_EXISTS;              \
564         int retval = array->remove_index(keys, token);          \
565                                                                 \
566         /* remove empty array. */                               \
567         if (0 == array->get_length()) {                         \
568             delete array;                                       \
569             array = NULL;                                       \
570                                                                 \
571             /* shrink self array. */                            \
572             g_array_set_size(m_chewing_array_indexes,           \
573                              get_length());                     \
574         }                                                       \
575         return retval;                                          \
576     }
577
578     switch (phrase_length) {
579         CASE(0);
580         CASE(1);
581         CASE(2);
582         CASE(3);
583         CASE(4);
584         CASE(5);
585         CASE(6);
586         CASE(7);
587         CASE(8);
588         CASE(9);
589         CASE(10);
590         CASE(11);
591         CASE(12);
592         CASE(13);
593         CASE(14);
594         CASE(15);
595     default:
596         assert(false);
597     }
598
599 #undef CASE
600 }
601
602 template<size_t phrase_length>
603 int ChewingArrayIndexLevel<phrase_length>::add_index
604 (/* in */ ChewingKey keys[], /* in */ phrase_token_t token) {
605     IndexItem * begin, * end;
606
607     IndexItem add_elem(keys, token);
608     begin = (IndexItem *) m_chunk.begin();
609     end   = (IndexItem *) m_chunk.end();
610
611     std_lite::pair<IndexItem *, IndexItem *> range;
612     range = std_lite::equal_range
613         (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
614
615     IndexItem * cur_elem;
616     for (cur_elem = range.first;
617          cur_elem != range.second; ++cur_elem) {
618         if (cur_elem->m_token == token)
619             return ERROR_INSERT_ITEM_EXISTS;
620         if (cur_elem->m_token > token)
621             break;
622     }
623
624     int offset = (cur_elem - begin) * sizeof(IndexItem);
625     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
626     return ERROR_OK;
627 }
628
629 template<size_t phrase_length>
630 int ChewingArrayIndexLevel<phrase_length>::remove_index
631 (/* in */ ChewingKey keys[], /* in */ phrase_token_t token) {
632     IndexItem * begin, * end;
633
634     IndexItem remove_elem(keys, token);
635     begin = (IndexItem *) m_chunk.begin();
636     end   = (IndexItem *) m_chunk.end();
637
638     std_lite::pair<IndexItem *, IndexItem *> range;
639     range = std_lite::equal_range
640         (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
641
642     IndexItem * cur_elem;
643     for (cur_elem = range.first;
644          cur_elem != range.second; ++cur_elem) {
645         if (cur_elem->m_token == token)
646             break;
647     }
648
649     if (cur_elem == range.second)
650         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
651
652     int offset = (cur_elem - begin) * sizeof(IndexItem);
653     m_chunk.remove_content(offset, sizeof(IndexItem));
654     return ERROR_OK;
655 }
656
657
658 /* load text method */
659 bool ChewingLargeTable::load_text(FILE * infile) {
660     char pinyin[256];
661     char phrase[256];
662     phrase_token_t token;
663     size_t freq;
664
665     while (!feof(infile)) {
666         fscanf(infile, "%s", pinyin);
667         fscanf(infile, "%s", phrase);
668         fscanf(infile, "%u", &token);
669         fscanf(infile, "%ld", &freq);
670
671         if(feof(infile))
672             break;
673
674         glong len = g_utf8_strlen(phrase, -1);
675
676         FullPinyinParser2 parser;
677         ChewingKeyVector keys;
678         ChewingKeyRestVector key_rests;
679
680         keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
681         key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
682
683         pinyin_option_t options = USE_TONE;
684         parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
685
686         if (len != keys->len) {
687             fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
688                     pinyin, phrase, token, freq);
689             continue;
690         }
691
692         add_index(keys->len, (ChewingKey *)keys->data, token);
693
694         g_array_free(keys, TRUE);
695         g_array_free(key_rests, TRUE);
696     }
697
698     return true;
699 }
700
701
702 /* load/store method */
703
704 bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
705                                    table_offset_t end) {
706     reset();
707     char * begin = (char *) chunk->begin();
708     table_offset_t phrase_begin, phrase_end;
709     table_offset_t * index = (table_offset_t *) (begin + offset);
710     phrase_end = *index;
711
712     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
713         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
714             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
715                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
716                     phrase_begin = phrase_end;
717                     index++;
718                     phrase_end = *index;
719
720                     if (phrase_begin == phrase_end) /* null pointer */
721                         continue;
722
723                     /* after reset() all phrases are null pointer. */
724                     ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
725                     m_chewing_length_indexes[k][l][m][n] = phrases;
726
727                     phrases->load(chunk, phrase_begin, phrase_end - 1);
728                     assert(phrase_end <= end);
729                     assert(*(begin + phrase_end - 1)  == c_separate);
730                 }
731
732     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
733     assert(c_separate == *(begin + offset));
734     return true;
735 }
736
737 bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
738                                     table_offset_t offset,
739                                     table_offset_t & end) {
740     table_offset_t phrase_end;
741     table_offset_t index = offset;
742     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
743
744     /* add '#' */
745     new_chunk->set_content(offset, &c_separate, sizeof(char));
746     offset += sizeof(char);
747     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
748     index += sizeof(table_offset_t);
749
750     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
751         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
752             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
753                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
754                     ChewingLengthIndexLevel * phrases =
755                         m_chewing_length_indexes[k][l][m][n];
756
757                     if (NULL == phrases) { /* null pointer */
758                         new_chunk->set_content(index, &offset,
759                                                sizeof(table_offset_t));
760                         index += sizeof(table_offset_t);
761                         continue;
762                     }
763
764                     /* has a end '#' */
765                     phrases->store(new_chunk, offset, phrase_end);
766                     offset = phrase_end;
767
768                     /* add '#' */
769                     new_chunk->set_content(offset, &c_separate, sizeof(char));
770                     offset += sizeof(char);
771                     new_chunk->set_content(index, &offset,
772                                            sizeof(table_offset_t));
773                     index += sizeof(table_offset_t);
774                 }
775
776     end = offset;
777     return true;
778 }
779
780 bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
781                                    table_offset_t end) {
782     char * begin = (char *) chunk->begin();
783     guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
784     table_offset_t * index = (table_offset_t *)
785         (begin + offset + sizeof(guint32));
786
787     table_offset_t phrase_begin, phrase_end = *index;
788     g_array_set_size(m_chewing_array_indexes, 0);
789     for (guint32 i = 0; i < nindex; ++i) {
790         phrase_begin = phrase_end;
791         index++;
792         phrase_end = *index;
793
794         if (phrase_begin == phrase_end) {
795             void * null = NULL;
796             g_array_append_val(m_chewing_array_indexes, null);
797             continue;
798         }
799
800 #define CASE(len) case len:                                             \
801         {                                                               \
802             ChewingArrayIndexLevel<len> * phrase =                      \
803                 new ChewingArrayIndexLevel<len>;                        \
804             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
805             assert(*(begin + phrase_end - 1) == c_separate);            \
806             assert(phrase_end <= end);                                  \
807             g_array_append_val(m_chewing_array_indexes, phrase);        \
808             break;                                                      \
809         }
810
811         switch ( i ){
812             CASE(0);
813             CASE(1);
814             CASE(2);
815             CASE(3);
816             CASE(4);
817             CASE(5);
818             CASE(6);
819             CASE(7);
820             CASE(8);
821             CASE(9);
822             CASE(10);
823             CASE(11);
824             CASE(12);
825             CASE(13);
826             CASE(14);
827             CASE(15);
828         default:
829             assert(false);
830         }
831
832 #undef CASE
833     }
834
835     /* check '#' */
836     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
837     assert(c_separate == *(begin + offset));
838     return true;
839 }
840
841 bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
842                                     table_offset_t offset,
843                                     table_offset_t & end) {
844     guint32 nindex = m_chewing_array_indexes->len; /* number of index */
845     new_chunk->set_content(offset, &nindex, sizeof(guint32));
846     table_offset_t index = offset + sizeof(guint32);
847
848     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
849     new_chunk->set_content(offset, &c_separate, sizeof(char));
850     offset += sizeof(char);
851     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
852     index += sizeof(table_offset_t);
853
854     table_offset_t phrase_end;
855     for (guint32 i = 0; i < nindex; ++i) {
856 #define CASE(len) case len:                                             \
857         {                                                               \
858             ChewingArrayIndexLevel<len> * phrase = g_array_index        \
859                 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
860             if (NULL == phrase) {                                       \
861                 new_chunk->set_content                                  \
862                     (index, &offset, sizeof(table_offset_t));           \
863                 index += sizeof(table_offset_t);                        \
864                 continue;                                               \
865             }                                                           \
866             phrase->store(new_chunk, offset, phrase_end);               \
867             offset = phrase_end;                                        \
868             break;                                                      \
869         }
870
871         switch ( i ){
872             CASE(0);
873             CASE(1);
874             CASE(2);
875             CASE(3);
876             CASE(4);
877             CASE(5);
878             CASE(6);
879             CASE(7);
880             CASE(8);
881             CASE(9);
882             CASE(10);
883             CASE(11);
884             CASE(12);
885             CASE(13);
886             CASE(14);
887             CASE(15);
888         default:
889             assert(false);
890         }
891 #undef CASE
892
893         /* add '#' */
894         new_chunk->set_content(offset, &c_separate, sizeof(char));
895         offset += sizeof(char);
896         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
897         index += sizeof(table_offset_t);
898     }
899
900     end = offset;
901     return true;
902 }
903
904 template<size_t phrase_length>
905 bool ChewingArrayIndexLevel<phrase_length>::
906 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
907     char * begin = (char *) chunk->begin();
908     m_chunk.set_chunk(begin + offset, end - offset, NULL);
909     return true;
910 }
911
912 template<size_t phrase_length>
913 bool ChewingArrayIndexLevel<phrase_length>::
914 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
915     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
916     end = offset + m_chunk.size();
917     return true;
918 }
919
920
921 /* get length method */
922
923 int ChewingLengthIndexLevel::get_length() const {
924     int length = m_chewing_array_indexes->len;
925
926     /* trim trailing zero. */
927     for (int i = length - 1; i >= 0; --i) {
928         void * array = g_array_index(m_chewing_array_indexes, void *, i);
929
930         if (NULL != array)
931             break;
932
933         --length;
934     }
935
936     return length;
937 }
938
939 template<size_t phrase_length>
940 int ChewingArrayIndexLevel<phrase_length>::get_length() const {
941     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
942     chunk_begin = (IndexItem *) m_chunk.begin();
943     chunk_end = (IndexItem *) m_chunk.end();
944
945     return chunk_end - chunk_begin;
946 }