begin to write import iterator
[platform/upstream/libpinyin.git] / src / storage / chewing_large_table.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include "chewing_large_table.h"
23 #include <assert.h>
24 #include "pinyin_phrase2.h"
25 #include "pinyin_parser2.h"
26
27
28 /* internal class definition */
29
30 namespace pinyin{
31 class ChewingLengthIndexLevel{
32
33 protected:
34     GArray * m_chewing_array_indexes;
35
36 public:
37     /* constructor/destructor */
38     ChewingLengthIndexLevel();
39     ~ChewingLengthIndexLevel();
40
41     /* load/store method */
42     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
43     bool store(MemoryChunk * new_chunk, table_offset_t offset,
44                table_offset_t & end);
45
46     /* search method */
47     int search(pinyin_option_t options, int phrase_length,
48                /* in */ ChewingKey keys[],
49                /* out */ PhraseIndexRanges ranges) const;
50
51     /* add/remove index method */
52     int add_index(int phrase_length, /* in */ ChewingKey keys[],
53                   /* in */ phrase_token_t token);
54     int remove_index(int phrase_length, /* in */ ChewingKey keys[],
55                      /* in */ phrase_token_t token);
56 };
57
58
59 template<int phrase_length>
60 class ChewingArrayIndexLevel{
61 protected:
62     typedef PinyinIndexItem2<phrase_length> IndexItem;
63
64 protected:
65     MemoryChunk m_chunk;
66
67     /* compress consecutive tokens */
68     int convert(pinyin_option_t options,
69                 ChewingKey keys[],
70                 IndexItem * begin,
71                 IndexItem * end,
72                 PhraseIndexRanges ranges) const;
73
74 public:
75     /* load/store method */
76     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
77     bool store(MemoryChunk * new_chunk, table_offset_t offset,
78                table_offset_t & end);
79
80     /* search method */
81     int search(pinyin_option_t options, /* in */ChewingKey keys[],
82                /* out */ PhraseIndexRanges ranges) const;
83
84     /* add/remove index method */
85     int add_index(/* in */ ChewingKey keys[], /* in */ phrase_token_t token);
86     int remove_index(/* in */ ChewingKey keys[],
87                      /* in */ phrase_token_t token);
88 };
89
90 };
91
92
93 using namespace pinyin;
94
95 /* class implementation */
96
97 ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
98     : m_options(options) {
99     memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
100 }
101
102 void ChewingBitmapIndexLevel::reset() {
103     for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
104         for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
105             for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
106                 for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
107                      ++n) {
108                     ChewingLengthIndexLevel * & length_array =
109                         m_chewing_length_indexes[k][l][m][n];
110                     if (length_array)
111                         delete length_array;
112                     length_array = NULL;
113                 }
114 }
115
116
117 /* search methods */
118
119 int ChewingBitmapIndexLevel::search(int phrase_length,
120                                     /* in */ ChewingKey keys[],
121                                     /* out */ PhraseIndexRanges ranges) const {
122     assert(phrase_length > 0);
123     return initial_level_search(phrase_length, keys, ranges);
124 }
125
126 int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
127     /* in */ ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
128
129 /* macros */
130 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
131     {                                                                   \
132         result |= middle_and_final_level_search(ORIGIN, phrase_length,  \
133                                                 keys, ranges);          \
134         if (m_options & AMBIGUITY) {                                    \
135             result |= middle_and_final_level_search(ANOTHER,            \
136                                                     phrase_length,      \
137                                                     keys, ranges);      \
138         }                                                               \
139         return result;                                                  \
140     }
141
142     /* deal with ambiguities */
143     int result = SEARCH_NONE;
144     const ChewingKey & first_key = keys[0];
145
146     switch(first_key.m_initial) {
147         MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
148         MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
149         MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
150         MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
151         MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
152         MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
153         MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
154         MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
155         MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
156         MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
157         MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
158         MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
159
160     case CHEWING_L:
161         {
162             result |= middle_and_final_level_search
163                 (CHEWING_L, phrase_length, keys, ranges);
164
165             if (m_options & PINYIN_AMB_L_N)
166                 result |= middle_and_final_level_search
167                     (CHEWING_N, phrase_length, keys,ranges);
168
169             if (m_options & PINYIN_AMB_L_R)
170                 result |= middle_and_final_level_search
171                     (CHEWING_R, phrase_length, keys, ranges);
172             return result;
173         }
174     default:
175         {
176             result |= middle_and_final_level_search
177                 ((ChewingInitial) first_key.m_initial,
178                  phrase_length, keys, ranges);
179             return result;
180         }
181     }
182 #undef MATCH
183     return result;
184 }
185
186
187 int ChewingBitmapIndexLevel::middle_and_final_level_search
188 (ChewingInitial initial, int phrase_length, /* in */ ChewingKey keys[],
189  /* out */ PhraseIndexRanges ranges) const {
190
191 /* macros */
192 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
193     {                                                                   \
194         result = tone_level_search                                      \
195             (initial, middle,                                           \
196              ORIGIN, phrase_length, keys, ranges);                      \
197         if (m_options & AMBIGUITY) {                                    \
198             result |= tone_level_search                                 \
199                 (initial, middle,                                       \
200                  ANOTHER, phrase_length, keys, ranges);                 \
201         }                                                               \
202         return result;                                                  \
203     }
204
205     int result = SEARCH_NONE;
206     const ChewingKey & first_key = keys[0];
207     const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
208
209     switch(first_key.m_final) {
210     case CHEWING_ZERO_FINAL:
211         {
212             if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
213                 if (!(m_options & PINYIN_INCOMPLETE))
214                     return result;
215                 for (int m = CHEWING_ZERO_MIDDLE;
216                      m < CHEWING_NUMBER_OF_MIDDLES; ++m)
217                     for (int n = CHEWING_ZERO_FINAL;
218                          n < CHEWING_NUMBER_OF_FINALS; ++n) {
219
220                         if (CHEWING_ZERO_MIDDLE == m &&
221                             CHEWING_ZERO_FINAL == n)
222                             continue;
223
224                         result |= tone_level_search
225                             (initial, (ChewingMiddle) m, (ChewingFinal) n,
226                              phrase_length, keys, ranges);
227                     }
228                 return result;
229             } else { /* normal pinyin */
230                 result |= tone_level_search
231                     (initial, middle, CHEWING_ZERO_FINAL,
232                      phrase_length, keys, ranges);
233                 return result;
234             }
235         }
236
237         MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
238         MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
239         MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
240         MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
241         MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
242         MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
243
244     default:
245         {
246             result |= tone_level_search
247                 (initial, middle, (ChewingFinal) first_key.m_final,
248                  phrase_length, keys, ranges);
249             return result;
250         }
251     }
252 #undef MATCH
253     return result;
254 }
255
256
257 int ChewingBitmapIndexLevel::tone_level_search
258 (ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
259  int phrase_length, /* in */ ChewingKey keys[],
260  /* out */ PhraseIndexRanges ranges) const {
261
262     int result = SEARCH_NONE;
263     const ChewingKey & first_key = keys[0];
264
265     switch (first_key.m_tone) {
266     case CHEWING_ZERO_TONE:
267         {
268             /* deal with zero tone in chewing large table. */
269             for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
270                 ChewingLengthIndexLevel * phrases =
271                     m_chewing_length_indexes
272                     [initial][middle][final][(ChewingTone)i];
273                 if (phrases)
274                     result |= phrases->search
275                         (m_options, phrase_length - 1, keys + 1, ranges);
276             }
277             return result;
278         }
279     default:
280         {
281             ChewingLengthIndexLevel * phrases =
282                 m_chewing_length_indexes
283                 [initial][middle][final][CHEWING_ZERO_TONE];
284             if (phrases)
285                 result |= phrases->search
286                     (m_options, phrase_length - 1, keys + 1, ranges);
287
288             phrases = m_chewing_length_indexes
289                 [initial][middle][final][(ChewingTone) first_key.m_tone];
290             if (phrases)
291                 result |= phrases->search
292                     (m_options, phrase_length - 1, keys + 1, ranges);
293             return result;
294         }
295     }
296     return result;
297 }
298
299
300 ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
301     m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
302 }
303
304 ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
305 #define CASE(len) case len:                                             \
306     {                                                                   \
307         ChewingArrayIndexLevel<len> * & array = g_array_index           \
308             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
309         if (array)                                                      \
310             delete array;                                               \
311         array = NULL;                                                   \
312         break;                                                          \
313     }
314
315     for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
316         switch (i){
317             CASE(0);
318             CASE(1);
319             CASE(2);
320             CASE(3);
321             CASE(4);
322             CASE(5);
323             CASE(6);
324             CASE(7);
325             CASE(8);
326             CASE(9);
327             CASE(10);
328             CASE(11);
329             CASE(12);
330             CASE(13);
331             CASE(14);
332             CASE(15);
333         default:
334             assert(false);
335         }
336     }
337 #undef CASE
338     g_array_free(m_chewing_array_indexes, TRUE);
339     m_chewing_array_indexes = NULL;
340 }
341
342
343 int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
344                                     /* in */ ChewingKey keys[],
345                                     /* out */ PhraseIndexRanges ranges) const {
346     int result = SEARCH_NONE;
347     if (m_chewing_array_indexes->len < phrase_length + 1)
348         return result;
349     if (m_chewing_array_indexes->len > phrase_length + 1)
350         result |= SEARCH_CONTINUED;
351
352 #define CASE(len) case len:                                             \
353     {                                                                   \
354         ChewingArrayIndexLevel<len> * & array = g_array_index           \
355             (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
356         if (!array)                                                     \
357             return result;                                              \
358         result |= array->search(options, keys, ranges);                 \
359         return result;                                                  \
360     }
361
362     switch (phrase_length) {
363         CASE(0);
364         CASE(1);
365         CASE(2);
366         CASE(3);
367         CASE(4);
368         CASE(5);
369         CASE(6);
370         CASE(7);
371         CASE(8);
372         CASE(9);
373         CASE(10);
374         CASE(11);
375         CASE(12);
376         CASE(13);
377         CASE(14);
378         CASE(15);
379     default:
380         assert(false);
381     }
382
383 #undef CASE
384 }
385
386
387 template<int phrase_length>
388 int ChewingArrayIndexLevel<phrase_length>::search
389 (pinyin_option_t options, /* in */ChewingKey keys[],
390  /* out */ PhraseIndexRanges ranges) const {
391     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
392     chunk_begin = (IndexItem *) m_chunk.begin();
393     chunk_end = (IndexItem *) m_chunk.end();
394
395     /* do the search */
396     ChewingKey left_keys[phrase_length], right_keys[phrase_length];
397     compute_lower_value2(options, keys, left_keys, phrase_length);
398     compute_upper_value2(options, keys, right_keys, phrase_length);
399
400     IndexItem left(left_keys, -1), right(right_keys, -1);
401
402     IndexItem * begin = std_lite::lower_bound
403         (chunk_begin, chunk_end, left,
404          phrase_exact_less_than2<phrase_length>);
405     IndexItem * end   = std_lite::upper_bound
406         (chunk_begin, chunk_end, right,
407          phrase_exact_less_than2<phrase_length>);
408
409     return convert(options, keys, begin, end, ranges);
410 }
411
412 /* compress consecutive tokens */
413 template<int phrase_length>
414 int ChewingArrayIndexLevel<phrase_length>::convert
415 (pinyin_option_t options, ChewingKey keys[],
416  IndexItem * begin, IndexItem * end,
417  PhraseIndexRanges ranges) const {
418     IndexItem * iter = NULL;
419     PhraseIndexRange cursor;
420     GArray * head, * cursor_head = NULL;
421
422     int result = SEARCH_NONE;
423     /* TODO: check the below code */
424     cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
425     for (iter = begin; iter != end; ++iter) {
426         if (0 != pinyin_compare_with_ambiguities2
427             (options, keys, iter->m_keys, phrase_length))
428             continue;
429
430         phrase_token_t token = iter->m_token;
431         head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
432         if (NULL == head)
433             continue;
434
435         result |= SEARCH_OK;
436
437         if (null_token == cursor.m_range_begin) {
438             cursor.m_range_begin = token;
439             cursor.m_range_end   = token + 1;
440             cursor_head = head;
441         } else if (cursor.m_range_end == token &&
442                    PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
443                    PHRASE_INDEX_LIBRARY_INDEX(token)) {
444             ++cursor.m_range_end;
445         } else {
446             g_array_append_val(cursor_head, cursor);
447             cursor.m_range_begin = token; cursor.m_range_end = token + 1;
448             cursor_head = head;
449         }
450     }
451
452     if (null_token == cursor.m_range_begin)
453         return result;
454
455     g_array_append_val(cursor_head, cursor);
456     return result;
457 }
458
459
460 /* add/remove index method */
461
462 int ChewingBitmapIndexLevel::add_index(int phrase_length,
463                                        /* in */ ChewingKey keys[],
464                                        /* in */ phrase_token_t token) {
465     const ChewingKey first_key = keys[0];
466     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
467         [first_key.m_initial][first_key.m_middle]
468         [first_key.m_final][first_key.m_tone];
469
470     if (NULL == length_array) {
471         length_array = new ChewingLengthIndexLevel();
472     }
473
474     return length_array->add_index(phrase_length - 1, keys + 1, token);
475 }
476
477 int ChewingBitmapIndexLevel::remove_index(int phrase_length,
478                                           /* in */ ChewingKey keys[],
479                                           /* in */ phrase_token_t token) {
480     const ChewingKey first_key = keys[0];
481     ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
482         [first_key.m_initial][first_key.m_middle]
483         [first_key.m_final][first_key.m_tone];
484
485     if (length_array)
486         return length_array->remove_index(phrase_length - 1, keys + 1, token);
487     return ERROR_REMOVE_ITEM_DONOT_EXISTS;
488 }
489
490 int ChewingLengthIndexLevel::add_index(int phrase_length,
491                                        /* in */ ChewingKey keys[],
492                                        /* in */ phrase_token_t token) {
493     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
494         return ERROR_PHRASE_TOO_LONG;
495
496     if (m_chewing_array_indexes->len <= phrase_length)
497         g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
498
499 #define CASE(len) case len:                                     \
500     {                                                           \
501         ChewingArrayIndexLevel<len> * & array = g_array_index   \
502             (m_chewing_array_indexes,                           \
503              ChewingArrayIndexLevel<len> *, len);               \
504         if (NULL == array)                                      \
505             array = new ChewingArrayIndexLevel<len>;            \
506         return array->add_index(keys, token);                   \
507     }
508
509     switch(phrase_length) {
510         CASE(0);
511         CASE(1);
512         CASE(2);
513         CASE(3);
514         CASE(4);
515         CASE(5);
516         CASE(6);
517         CASE(7);
518         CASE(8);
519         CASE(9);
520         CASE(10);
521         CASE(11);
522         CASE(12);
523         CASE(13);
524         CASE(14);
525         CASE(15);
526     default:
527         assert(false);
528     }
529
530 #undef CASE
531 }
532
533 int ChewingLengthIndexLevel::remove_index(int phrase_length,
534                                           /* in */ ChewingKey keys[],
535                                           /* in */ phrase_token_t token) {
536     if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
537         return ERROR_PHRASE_TOO_LONG;
538
539     if (m_chewing_array_indexes->len <= phrase_length)
540         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
541
542 #define CASE(len) case len:                                     \
543     {                                                           \
544         ChewingArrayIndexLevel<len> * & array = g_array_index   \
545             (m_chewing_array_indexes,                           \
546              ChewingArrayIndexLevel<len> *, len);               \
547         if (NULL == array)                                      \
548             return ERROR_REMOVE_ITEM_DONOT_EXISTS;                    \
549         return array->remove_index(keys, token);                \
550     }
551
552     switch (phrase_length) {
553         CASE(0);
554         CASE(1);
555         CASE(2);
556         CASE(3);
557         CASE(4);
558         CASE(5);
559         CASE(6);
560         CASE(7);
561         CASE(8);
562         CASE(9);
563         CASE(10);
564         CASE(11);
565         CASE(12);
566         CASE(13);
567         CASE(14);
568         CASE(15);
569     default:
570         assert(false);
571     }
572
573 #undef CASE
574 }
575
576 template<int phrase_length>
577 int ChewingArrayIndexLevel<phrase_length>::add_index
578 (/* in */ ChewingKey keys[], /* in */ phrase_token_t token) {
579     IndexItem * begin, * end;
580
581     IndexItem add_elem(keys, token);
582     begin = (IndexItem *) m_chunk.begin();
583     end   = (IndexItem *) m_chunk.end();
584
585     std_lite::pair<IndexItem *, IndexItem *> range;
586     range = std_lite::equal_range
587         (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
588
589     IndexItem * cur_elem;
590     for (cur_elem = range.first;
591          cur_elem != range.second; ++cur_elem) {
592         if (cur_elem->m_token == token)
593             return ERROR_INSERT_ITEM_EXISTS;
594         if (cur_elem->m_token > token)
595             break;
596     }
597
598     int offset = (cur_elem - begin) * sizeof(IndexItem);
599     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
600     return ERROR_OK;
601 }
602
603 template<int phrase_length>
604 int ChewingArrayIndexLevel<phrase_length>::remove_index
605 (/* in */ ChewingKey keys[], /* in */ phrase_token_t token) {
606     IndexItem * begin, * end;
607
608     IndexItem remove_elem(keys, token);
609     begin = (IndexItem *) m_chunk.begin();
610     end   = (IndexItem *) m_chunk.end();
611
612     std_lite::pair<IndexItem *, IndexItem *> range;
613     range = std_lite::equal_range
614         (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
615
616     IndexItem * cur_elem;
617     for (cur_elem = range.first;
618          cur_elem != range.second; ++cur_elem) {
619         if (cur_elem->m_token == token)
620             break;
621     }
622
623     if (cur_elem == range.second)
624         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
625
626     int offset = (cur_elem - begin) * sizeof(IndexItem);
627     m_chunk.remove_content(offset, sizeof(IndexItem));
628     return ERROR_OK;
629 }
630
631
632 /* load text method */
633 bool ChewingLargeTable::load_text(FILE * infile) {
634     char pinyin[256];
635     char phrase[256];
636     phrase_token_t token;
637     size_t freq;
638
639     while (!feof(infile)) {
640         fscanf(infile, "%s", pinyin);
641         fscanf(infile, "%s", phrase);
642         fscanf(infile, "%u", &token);
643         fscanf(infile, "%ld", &freq);
644
645         if(feof(infile))
646             break;
647
648         glong len = g_utf8_strlen(phrase, -1);
649
650         FullPinyinParser2 parser;
651         ChewingKeyVector keys;
652         ChewingKeyRestVector key_rests;
653
654         keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
655         key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
656
657         pinyin_option_t options = USE_TONE;
658         parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
659
660         if (len != keys->len) {
661             fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
662                     pinyin, phrase, token, freq);
663             continue;
664         }
665
666         add_index(keys->len, (ChewingKey *)keys->data, token);
667
668         g_array_free(keys, TRUE);
669         g_array_free(key_rests, TRUE);
670     }
671
672     return true;
673 }
674
675
676 /* load/store method */
677
678 bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
679                                    table_offset_t end) {
680     reset();
681     char * begin = (char *) chunk->begin();
682     table_offset_t phrase_begin, phrase_end;
683     table_offset_t * index = (table_offset_t *) (begin + offset);
684     phrase_end = *index;
685
686     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
687         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
688             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
689                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
690                     phrase_begin = phrase_end;
691                     index++;
692                     phrase_end = *index;
693
694                     if (phrase_begin == phrase_end) /* null pointer */
695                         continue;
696
697                     ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
698                     phrases->load(chunk, phrase_begin, phrase_end - 1);
699                     m_chewing_length_indexes[k][l][m][n] = phrases;
700
701                     assert(phrase_end <= end);
702                     assert(*(begin + phrase_end - 1)  == c_separate);
703                 }
704
705     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
706     assert(c_separate == *(begin + offset));
707     return true;
708 }
709
710 bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
711                                     table_offset_t offset,
712                                     table_offset_t & end) {
713     table_offset_t phrase_end;
714     table_offset_t index = offset;
715     offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
716
717     /* add '#' */
718     new_chunk->set_content(offset, &c_separate, sizeof(char));
719     offset += sizeof(char);
720     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
721     index += sizeof(table_offset_t);
722
723     for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
724         for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
725             for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
726                 for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
727                     ChewingLengthIndexLevel * phrases =
728                         m_chewing_length_indexes[k][l][m][n];
729
730                     if (NULL == phrases) { /* null pointer */
731                         new_chunk->set_content(index, &offset,
732                                                sizeof(table_offset_t));
733                         index += sizeof(table_offset_t);
734                         continue;
735                     }
736
737                     /* has a end '#' */
738                     phrases->store(new_chunk, offset, phrase_end);
739                     offset = phrase_end;
740
741                     /* add '#' */
742                     new_chunk->set_content(offset, &c_separate, sizeof(char));
743                     offset += sizeof(char);
744                     new_chunk->set_content(index, &offset,
745                                            sizeof(table_offset_t));
746                     index += sizeof(table_offset_t);
747                 }
748
749     end = offset;
750     return true;
751 }
752
753 bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
754                                    table_offset_t end) {
755     char * begin = (char *) chunk->begin();
756     guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
757     table_offset_t * index = (table_offset_t *)
758         (begin + offset + sizeof(guint32));
759
760     table_offset_t phrase_begin, phrase_end = *index;
761     m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
762     for (guint32 i = 0; i < nindex; ++i) {
763         phrase_begin = phrase_end;
764         index++;
765         phrase_end = *index;
766
767         if (phrase_begin == phrase_end) {
768             void * null = NULL;
769             g_array_append_val(m_chewing_array_indexes, null);
770             continue;
771         }
772
773 #define CASE(len) case len:                                             \
774         {                                                               \
775             ChewingArrayIndexLevel<len> * phrase =                      \
776                 new ChewingArrayIndexLevel<len>;                        \
777             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
778             assert(*(begin + phrase_end - 1) == c_separate);            \
779             assert(phrase_end <= end);                                  \
780             g_array_append_val(m_chewing_array_indexes, phrase);        \
781             break;                                                      \
782         }
783
784         switch ( i ){
785             CASE(0);
786             CASE(1);
787             CASE(2);
788             CASE(3);
789             CASE(4);
790             CASE(5);
791             CASE(6);
792             CASE(7);
793             CASE(8);
794             CASE(9);
795             CASE(10);
796             CASE(11);
797             CASE(12);
798             CASE(13);
799             CASE(14);
800             CASE(15);
801         default:
802             assert(false);
803         }
804
805 #undef CASE
806     }
807
808     /* check '#' */
809     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
810     assert(c_separate == *(begin + offset));
811     return true;
812 }
813
814 bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
815                                     table_offset_t offset,
816                                     table_offset_t & end) {
817     guint32 nindex = m_chewing_array_indexes->len; /* number of index */
818     new_chunk->set_content(offset, &nindex, sizeof(guint32));
819     table_offset_t index = offset + sizeof(guint32);
820
821     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
822     new_chunk->set_content(offset, &c_separate, sizeof(char));
823     offset += sizeof(char);
824     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
825     index += sizeof(table_offset_t);
826
827     table_offset_t phrase_end;
828     for (guint32 i = 0; i < nindex; ++i) {
829 #define CASE(len) case len:                                             \
830         {                                                               \
831             ChewingArrayIndexLevel<len> * phrase = g_array_index        \
832                 (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, i); \
833             if (NULL == phrase) {                                       \
834                 new_chunk->set_content                                  \
835                     (index, &offset, sizeof(table_offset_t));           \
836                 index += sizeof(table_offset_t);                        \
837                 continue;                                               \
838             }                                                           \
839             phrase->store(new_chunk, offset, phrase_end);               \
840             offset = phrase_end;                                        \
841             break;                                                      \
842         }
843
844         switch ( i ){
845             CASE(0);
846             CASE(1);
847             CASE(2);
848             CASE(3);
849             CASE(4);
850             CASE(5);
851             CASE(6);
852             CASE(7);
853             CASE(8);
854             CASE(9);
855             CASE(10);
856             CASE(11);
857             CASE(12);
858             CASE(13);
859             CASE(14);
860             CASE(15);
861         default:
862             assert(false);
863         }
864 #undef CASE
865
866         /* add '#' */
867         new_chunk->set_content(offset, &c_separate, sizeof(char));
868         offset += sizeof(char);
869         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
870         index += sizeof(table_offset_t);
871     }
872
873     end = offset;
874     return true;
875 }
876
877 template<int phrase_length>
878 bool ChewingArrayIndexLevel<phrase_length>::
879 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
880     char * begin = (char *) chunk->begin();
881     m_chunk.set_chunk(begin + offset, end - offset, NULL);
882     return true;
883 }
884
885 template<int phrase_length>
886 bool ChewingArrayIndexLevel<phrase_length>::
887 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
888     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
889     end = offset + m_chunk.size();
890     return true;
891 }