d9094a5f8a2b9a3f2cbbec66f3a9173c52faa32b
[platform/upstream/libpinyin.git] / src / storage / pinyin_large_table.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2006-2007 Peng Wu
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  */
21
22 #include <assert.h>
23 #include <string.h>
24 #include "novel_types.h"
25 #include "pinyin_base.h"
26 #include "pinyin_phrase.h"
27 #include "pinyin_large_table.h"
28
29
30 /* class definition */
31
32 namespace pinyin{
33
34 class PinyinLengthIndexLevel{
35 protected:
36     GArray* m_pinyin_array_indexes;
37 public:
38     PinyinLengthIndexLevel();
39     ~PinyinLengthIndexLevel();
40     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
41     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
42
43     /*search/add_index method */
44     int search( int phrase_length, /* in */ PinyinCustomSettings * custom,
45                 /* in */ PinyinKey keys[],
46                 /* out */ PhraseIndexRanges ranges);
47     int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
48     int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
49 };
50
51 template<size_t phrase_length>
52 class PinyinArrayIndexLevel{
53 protected:
54     MemoryChunk m_chunk;
55     int convert(PinyinCustomSettings * custom,
56                 PinyinKey keys[],
57                 PinyinIndexItem<phrase_length> * begin,
58                 PinyinIndexItem<phrase_length> * end,
59                 PhraseIndexRanges ranges);
60 public:
61     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
62     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
63
64     /*search/add_index method */
65     int search(/* in */ PinyinCustomSettings * custom,
66                /* in */ PinyinKey keys[],
67                /* out */ PhraseIndexRanges ranges);
68     int add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token);
69     int remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token);
70 };
71
72 };
73
74 using namespace pinyin;
75
76 /* class implementation */
77
78 PinyinBitmapIndexLevel::PinyinBitmapIndexLevel(PinyinCustomSettings * custom)
79     :m_custom(custom){
80     memset(m_pinyin_length_indexes, 0, sizeof(m_pinyin_length_indexes));
81 }
82
83 void PinyinBitmapIndexLevel::reset(){
84     for ( int k = PINYIN_ZeroInitial; k < PINYIN_Number_Of_Initials; k++)
85         for ( int m = PINYIN_ZeroFinal; m < PINYIN_Number_Of_Finals; m++)
86             for ( int n = PINYIN_ZeroTone; n < PINYIN_Number_Of_Tones; n++){
87                 PinyinLengthIndexLevel * length_array = 
88                     m_pinyin_length_indexes[k][m][n];
89                 if ( length_array )
90                     delete length_array;
91             }
92 }
93
94 int PinyinBitmapIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[],
95             /* out */ PhraseIndexRanges ranges) const{
96     assert(phrase_length > 0);
97     return initial_level_search(phrase_length, keys, ranges);
98 }
99
100 int PinyinBitmapIndexLevel::initial_level_search(int phrase_length, 
101                                                  /* in */PinyinKey keys[],
102                                                  /* out */ PhraseIndexRanges ranges) const{
103
104 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER)  case ORIGIN:                 \
105     {                                                                   \
106         result |= final_level_search((PinyinInitial)first_key.m_initial,\
107                                     phrase_length, keys, ranges);               \
108         if ( custom.use_ambiguities [AMBIGUITY] ){                      \
109             result |= final_level_search(ANOTHER,                       \
110                                          phrase_length, keys, ranges);  \
111         }                                                               \
112         return result;                                                  \
113     }
114     
115     //deal with the ambiguities
116
117     int result = SEARCH_NONE;
118     PinyinKey& first_key = keys[0];
119     PinyinCustomSettings &  custom= *m_custom;
120     
121     switch(first_key.m_initial){
122         
123         MATCH(PINYIN_AmbZhiZi, PINYIN_Zi, PINYIN_Zhi);
124         MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
125         MATCH(PINYIN_AmbChiCi, PINYIN_Ci, PINYIN_Chi);
126         MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci);
127         MATCH(PINYIN_AmbShiSi, PINYIN_Si, PINYIN_Shi);
128         MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si);
129         MATCH(PINYIN_AmbLeRi, PINYIN_Ri, PINYIN_Le);
130         MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le);
131         MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He);
132         MATCH(PINYIN_AmbFoHe, PINYIN_He, PINYIN_Fo);
133         MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke);
134         MATCH(PINYIN_AmbGeKe, PINYIN_Ke, PINYIN_Ge);
135
136     case PINYIN_Le:
137         {
138             result |= final_level_search((PinyinInitial)first_key.m_initial, 
139                                         phrase_length, keys, ranges);  
140             if ( custom.use_ambiguities [PINYIN_AmbLeRi] )              
141                 result |= final_level_search(PINYIN_Ri, phrase_length,
142                                              keys, ranges);     
143             if ( custom.use_ambiguities [PINYIN_AmbNeLe] )              
144                 result |= final_level_search(PINYIN_Ne, phrase_length, 
145                                              keys, ranges);
146             return result;
147         }
148     default:
149         {
150             return final_level_search((PinyinInitial)first_key.m_initial,
151                                       phrase_length, 
152                                       keys, ranges);
153         }
154   }
155 #undef MATCH 
156 }
157
158 int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
159                                                int phrase_length, 
160                                                /* in */PinyinKey keys[],
161                                                /* out */ PhraseIndexRanges ranges) const{
162 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
163     {                                                                   \
164         result = tone_level_search(initial,(PinyinFinal) first_key.m_final,\
165                                    phrase_length, keys, ranges);                \
166         if ( custom.use_ambiguities [AMBIGUITY] ){                      \
167             result |= tone_level_search(initial, ANOTHER,               \
168                                         phrase_length, keys, ranges);   \
169         }                                                               \
170         return result;                                                  \
171     }
172     
173     int result = SEARCH_NONE;
174     PinyinKey& first_key = keys[0];
175     PinyinCustomSettings &  custom= *m_custom;
176
177     switch(first_key.m_final){
178     case PINYIN_ZeroFinal:
179         {
180             if (!custom.use_incomplete )
181                 return result;
182             for ( int i  = PINYIN_A; i < PINYIN_Number_Of_Finals; ++i){
183                 result |= tone_level_search(initial,(PinyinFinal)i , 
184                                             phrase_length, keys, ranges);
185             }
186             return result;
187         }
188         
189         MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang);
190         MATCH(PINYIN_AmbAnAng, PINYIN_Ang, PINYIN_An);
191         MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng);
192         MATCH(PINYIN_AmbEnEng, PINYIN_Eng, PINYIN_En);
193         MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing);
194         MATCH(PINYIN_AmbInIng, PINYIN_Ing, PINYIN_In);
195         
196     default:
197         {
198             return tone_level_search(initial,(PinyinFinal)first_key.m_final, 
199                                      phrase_length, keys, ranges);
200         }
201     }
202 #undef MATCH
203 }
204
205 int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial, 
206                                               PinyinFinal final,
207                                               int phrase_length, 
208                                               /* in */PinyinKey keys[],
209                                               /* out */ PhraseIndexRanges ranges) const{
210     int result = SEARCH_NONE;
211     PinyinKey& first_key = keys[0];
212     PinyinCustomSettings &  custom= *m_custom;
213
214     switch ( first_key.m_tone ){
215     case PINYIN_ZeroTone:
216         {
217                 //deal with ZeroTone in pinyin table files.
218             for ( int i = PINYIN_ZeroTone; i < PINYIN_Number_Of_Tones; ++i){
219                 PinyinLengthIndexLevel * phrases = 
220                     m_pinyin_length_indexes[initial][final][(PinyinTone)i];
221                 if ( phrases )
222                     result |= phrases->search(phrase_length - 1, &custom,
223                                               keys + 1, ranges);
224             }
225             return result;
226         }
227     default:
228         {
229             PinyinLengthIndexLevel * phrases = 
230                 m_pinyin_length_indexes[initial][final]
231                 [PINYIN_ZeroTone];
232             if ( phrases )
233                 result = phrases->search(phrase_length - 1, &custom,
234                                          keys + 1, ranges);
235             phrases = m_pinyin_length_indexes[initial][final]
236                 [(PinyinTone) first_key.m_tone];
237             if ( phrases )
238                 result |= phrases->search(phrase_length - 1, &custom, 
239                                           keys + 1, ranges);
240             return result;
241         }
242     }
243     return result;
244 }
245
246 PinyinLengthIndexLevel::PinyinLengthIndexLevel(){
247     m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
248 }
249
250 PinyinLengthIndexLevel::~PinyinLengthIndexLevel(){
251 #define CASE(len) case len:                                             \
252     {                                                                   \
253         PinyinArrayIndexLevel<len> * array = g_array_index              \
254             (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
255         if (array)                                                      \
256             delete array;                                               \
257         break;                                                          \
258     }
259     for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i){
260         switch (i){
261             CASE(0);
262             CASE(1);
263             CASE(2);
264             CASE(3);
265             CASE(4);
266             CASE(5);
267             CASE(6);
268             CASE(7);
269             CASE(8);
270             CASE(9);
271             CASE(10);
272             CASE(11);
273             CASE(12);
274             CASE(13);
275             CASE(14);
276             CASE(15);
277         default:
278             assert(false);
279         }
280     }
281     g_array_free(m_pinyin_array_indexes, TRUE);
282 #undef CASE
283 }
284
285 int PinyinLengthIndexLevel::search( int phrase_length,
286                                     /* in */ PinyinCustomSettings * custom,
287                                     /* in */ PinyinKey keys[],
288                                     /* out */ PhraseIndexRanges ranges){
289     int result = SEARCH_NONE;
290     if (m_pinyin_array_indexes->len < phrase_length + 1)
291         return result;
292     if (m_pinyin_array_indexes->len > phrase_length + 1)
293         result |= SEARCH_CONTINUED;
294     
295 #define CASE(len) case len:                                             \
296     {                                                                   \
297         PinyinArrayIndexLevel<len> * array = g_array_index              \
298             (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
299         if ( !array )                                                   \
300             return result;                                              \
301         result |= array->search(custom, keys, ranges);                  \
302         return result;                                                  \
303     }
304
305     switch ( phrase_length ){
306         CASE(0);
307         CASE(1);
308         CASE(2);
309         CASE(3);
310         CASE(4);
311         CASE(5);
312         CASE(6);
313         CASE(7);
314         CASE(8);
315         CASE(9);
316         CASE(10);
317         CASE(11);
318         CASE(12);
319         CASE(13);
320         CASE(14);
321         CASE(15);
322     default:
323         assert(false);
324     }
325 #undef CASE
326 }
327
328 template<size_t phrase_length>
329 int PinyinArrayIndexLevel<phrase_length>::search(/* in */ PinyinCustomSettings * custom, /* in */ PinyinKey keys[], /* out */ PhraseIndexRanges ranges){
330   PhraseExactLessThan<phrase_length> m_lessthan;
331   PinyinIndexItem<phrase_length> * chunk_begin, * chunk_end;
332   chunk_begin = (PinyinIndexItem<phrase_length> *)m_chunk.begin();
333   chunk_end = (PinyinIndexItem<phrase_length> *)m_chunk.end();
334   //do the search
335   PinyinKey left_keys[phrase_length], right_keys[phrase_length];
336   compute_lower_value(*custom, keys, left_keys, phrase_length);
337   compute_upper_value(*custom, keys, right_keys, phrase_length);
338   PinyinIndexItem<phrase_length> left(left_keys, -1), right(right_keys, -1);
339
340   PinyinIndexItem<phrase_length> * begin = std_lite::lower_bound(chunk_begin, chunk_end, left, m_lessthan);
341   PinyinIndexItem<phrase_length> * end = std_lite::upper_bound(chunk_begin, chunk_end, right, m_lessthan);
342
343   return convert(custom, keys, begin, end, ranges);
344 }
345
346 template<size_t phrase_length>
347 int PinyinArrayIndexLevel<phrase_length>::convert(PinyinCustomSettings * custom, PinyinKey keys[], PinyinIndexItem<phrase_length> * begin, PinyinIndexItem<phrase_length> * end, PhraseIndexRanges ranges){
348     PinyinIndexItem<phrase_length> * iter;
349     PhraseIndexRange cursor;
350     GArray * head, *cursor_head = NULL;
351     int result = SEARCH_NONE;
352     cursor.m_range_begin = -1; cursor.m_range_end = -1;
353     for ( iter = begin; iter != end; ++iter){
354         if ( ! 0 == 
355              pinyin_compare_with_ambiguities
356              (*custom, keys, iter->m_keys, phrase_length))
357             continue;
358         phrase_token_t token = iter->m_token;
359         head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
360         if ( NULL == head )
361             continue;
362
363         result |= SEARCH_OK;
364
365         if ( cursor.m_range_begin == (phrase_token_t) -1 ){
366             cursor.m_range_begin = token;
367             cursor.m_range_end = token + 1;
368             cursor_head = head;
369         }else if (cursor.m_range_end == token && 
370                   PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_end) == 
371                   PHRASE_INDEX_LIBRARY_INDEX(token) ){
372             cursor.m_range_end++;
373         }else {
374             g_array_append_val(cursor_head, cursor);
375             cursor.m_range_begin = token; cursor.m_range_end = token + 1;
376             cursor_head = head;
377         }
378     }
379     if ( cursor.m_range_begin == (phrase_token_t) -1 )
380         return result;
381
382     g_array_append_val(cursor_head, cursor);
383     return result;
384 }
385
386 int PinyinBitmapIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
387     PinyinKey first_key = keys[0];
388     PinyinLengthIndexLevel * &length_array = 
389         m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone];
390     if ( !length_array ){
391         length_array = new PinyinLengthIndexLevel();
392     }
393     return length_array->add_index(phrase_length - 1, keys + 1, token);
394 }
395
396 int PinyinBitmapIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
397     PinyinKey first_key = keys[0];
398     PinyinLengthIndexLevel * &length_array = 
399         m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone];
400     if ( length_array )
401         return length_array->remove_index(phrase_length - 1, keys + 1, token);
402     return REMOVE_ITEM_DONOT_EXISTS;
403 }
404
405 int PinyinLengthIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
406     assert(phrase_length + 1 < MAX_PHRASE_LENGTH);
407     if ( m_pinyin_array_indexes -> len <= phrase_length )
408         g_array_set_size(m_pinyin_array_indexes, phrase_length + 1);
409 #define CASE(len)       case len:                                       \
410     {                                                                   \
411         PinyinArrayIndexLevel<len> * &array = g_array_index             \
412             (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
413         if ( !array )                                                   \
414             array = new PinyinArrayIndexLevel<len>;                     \
415         return array->add_index(keys, token);                           \
416     }
417     switch(phrase_length){
418         CASE(0);
419         CASE(1);
420         CASE(2);
421         CASE(3);
422         CASE(4);
423         CASE(5);
424         CASE(6);
425         CASE(7);
426         CASE(8);
427         CASE(9);
428         CASE(10);
429         CASE(11);
430         CASE(12);
431         CASE(13);
432         CASE(14);
433         CASE(15);
434     default:
435         assert(false);
436     }
437 #undef CASE
438 }
439
440 int PinyinLengthIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
441     assert(phrase_length + 1 < MAX_PHRASE_LENGTH);
442     if ( m_pinyin_array_indexes -> len <= phrase_length )
443         return REMOVE_ITEM_DONOT_EXISTS;
444 #define CASE(len)       case len:                                       \
445     {                                                                   \
446         PinyinArrayIndexLevel<len> * &array = g_array_index             \
447             (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
448         if ( !array )                                                   \
449             return REMOVE_ITEM_DONOT_EXISTS;                            \
450         return array->remove_index(keys, token);                        \
451     }
452     switch(phrase_length){
453         CASE(0);
454         CASE(1);
455         CASE(2);
456         CASE(3);
457         CASE(4);
458         CASE(5);
459         CASE(6);
460         CASE(7);
461         CASE(8);
462         CASE(9);
463         CASE(10);
464         CASE(11);
465         CASE(12);
466         CASE(13);
467         CASE(14);
468         CASE(15);
469     default:
470         assert(false);
471     }
472 #undef CASE
473 }
474
475 template<size_t phrase_length>
476 int PinyinArrayIndexLevel<phrase_length>::add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){
477     PhraseExactLessThan<phrase_length> m_lessthan;
478     PinyinIndexItem<phrase_length> * buf_begin, * buf_end;
479
480     PinyinIndexItem<phrase_length> new_elem(keys, token);
481     buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin();
482     buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end();
483
484     std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range;
485     range = std_lite::equal_range(buf_begin, buf_end, new_elem, m_lessthan);
486
487     PinyinIndexItem<phrase_length> * cur_elem;
488     for ( cur_elem = range.first; 
489           cur_elem != range.second; ++cur_elem){
490         if ( cur_elem->m_token == token )
491             return INSERT_ITEM_EXISTS;
492         if ( cur_elem->m_token > token )
493             break;
494     }
495
496     int offset = (cur_elem - buf_begin) *
497         sizeof(PinyinIndexItem<phrase_length>);
498     m_chunk.insert_content(offset, &new_elem, 
499                            sizeof ( PinyinIndexItem<phrase_length> ));
500     return INSERT_OK;
501 }
502
503 template<size_t phrase_length>
504 int PinyinArrayIndexLevel<phrase_length>::remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){
505     PhraseExactLessThan<phrase_length> m_lessthan;
506     PinyinIndexItem<phrase_length> * buf_begin, * buf_end;
507
508     PinyinIndexItem<phrase_length> remove_elem(keys, token);
509     buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin();
510     buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end();
511
512     std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range;
513     range = std_lite::equal_range(buf_begin, buf_end, remove_elem, m_lessthan);
514
515     PinyinIndexItem<phrase_length> * cur_elem;
516     for ( cur_elem = range.first; 
517           cur_elem != range.second; ++cur_elem){
518         if ( cur_elem->m_token == token )
519             break;
520     }
521     if (cur_elem->m_token != token )
522         return REMOVE_ITEM_DONOT_EXISTS;
523
524     int offset = (cur_elem - buf_begin) *
525         sizeof(PinyinIndexItem<phrase_length>);
526     m_chunk.remove_content(offset, sizeof (PinyinIndexItem<phrase_length>));
527     return REMOVE_OK;
528 }
529
530 bool PinyinLargeTable::load_text(FILE * infile){
531     char pinyin[256];
532     char phrase[256];
533     phrase_token_t token;
534     size_t freq;
535
536     while ( !feof(infile) ) {
537         fscanf(infile, "%s", pinyin);
538         fscanf(infile, "%s", phrase);
539         fscanf(infile, "%u", &token);
540         fscanf(infile, "%ld", &freq);
541
542         if ( feof(infile) )
543             break;
544
545         PinyinDefaultParser parser;
546         NullPinyinValidator validator;
547         PinyinKeyVector keys;
548         PinyinKeyPosVector poses;
549         
550         keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
551         poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
552         parser.parse(validator, keys, poses, pinyin);
553         
554         add_index( keys->len, (PinyinKey *)keys->data, token);
555
556         g_array_free(keys, true);
557         g_array_free(poses, true);
558     }
559     return true;
560 }
561
562 bool PinyinBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
563                                   table_offset_t end){
564     reset();
565     char * buf_begin = (char *) chunk->begin();
566     table_offset_t phrase_begin, phrase_end;
567     table_offset_t * index = (table_offset_t *) (buf_begin + offset);
568     phrase_end = *index;
569     for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m )
570         for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n)
571             for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k){
572                 phrase_begin = phrase_end;
573                 index++;
574                 phrase_end = *index;
575                 if ( phrase_begin == phrase_end ) //null pointer
576                     continue;
577                 PinyinLengthIndexLevel * phrases = new PinyinLengthIndexLevel;
578                 m_pinyin_length_indexes[m][n][k] = phrases;
579                 phrases->load(chunk, phrase_begin, phrase_end - 1);
580                 assert( phrase_end <= end );
581                 assert( *(buf_begin + phrase_end - 1) == c_separate);
582             }
583     offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof (table_offset_t);
584     assert( c_separate == *(buf_begin + offset) );
585     return true;
586 }
587
588 bool PinyinBitmapIndexLevel::store(MemoryChunk * new_chunk, 
589                                    table_offset_t offset,
590                                    table_offset_t & end){
591     table_offset_t phrase_end;
592     table_offset_t index = offset;
593     offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof ( table_offset_t);
594     //add '#'
595     new_chunk->set_content(offset, &c_separate, sizeof(char));
596     offset += sizeof(char);
597     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
598     index += sizeof(table_offset_t);
599     for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m)
600         for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n)
601             for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k) {
602                 PinyinLengthIndexLevel * phrases = m_pinyin_length_indexes[m][n][k];
603                 if ( !phrases ) { //null pointer
604                     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
605                     index += sizeof(table_offset_t);
606                     continue;
607                 }
608                 phrases->store(new_chunk, offset, phrase_end); //has a end '#'
609                 offset = phrase_end;
610                 //add '#'
611                 new_chunk->set_content(offset, &c_separate, sizeof(char));
612                 offset += sizeof(char);
613                 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
614                 index += sizeof(table_offset_t);
615             }
616     end = offset;
617     return true;
618 }
619
620 bool PinyinLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
621     char * buf_begin = (char *) chunk->begin();
622     guint32 nindex = *((guint32 *)(buf_begin + offset));
623     table_offset_t * index = (table_offset_t *)
624         (buf_begin + offset + sizeof(guint32));
625
626     table_offset_t phrase_begin, phrase_end = *index;
627     m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
628     for ( size_t i = 0; i < nindex; ++i) {
629         phrase_begin = phrase_end;
630         index++;
631         phrase_end = *index;
632         if ( phrase_begin == phrase_end ){
633             void * null = NULL;
634             g_array_append_val(m_pinyin_array_indexes, null);
635             continue;
636         }
637
638 #define CASE(len) case len:                                             \
639         {                                                               \
640             PinyinArrayIndexLevel<len> * phrase = new PinyinArrayIndexLevel<len>; \
641             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
642             assert( *(buf_begin + phrase_end - 1) == c_separate);       \
643             assert( phrase_end <= end );                                \
644             g_array_append_val(m_pinyin_array_indexes, phrase);         \
645             break;                                                      \
646         }
647         switch ( i ){
648             CASE(0);
649             CASE(1);
650             CASE(2);
651             CASE(3);
652             CASE(4);
653             CASE(5);
654             CASE(6);
655             CASE(7);
656             CASE(8);
657             CASE(9);
658             CASE(10);
659             CASE(11);
660             CASE(12);
661             CASE(13);
662             CASE(14);
663             CASE(15);
664         default:
665             assert(false);
666         }
667
668 #undef CASE
669     }
670     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
671     assert ( c_separate == * (buf_begin + offset) );
672     return true;
673 }
674
675 bool PinyinLengthIndexLevel::store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
676     guint32 nindex = m_pinyin_array_indexes->len;
677     new_chunk->set_content(offset, &nindex, sizeof(guint32));
678     table_offset_t index = offset + sizeof(guint32);
679
680     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
681     new_chunk->set_content(offset, &c_separate, sizeof(char));
682     offset += sizeof(char);
683     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
684     index += sizeof(table_offset_t);
685
686     table_offset_t phrase_end;
687     for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i) {
688 #define CASE(len) case len:                                             \
689         {                                                               \
690             PinyinArrayIndexLevel<len> * phrase = g_array_index         \
691                 (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> * , i); \
692             if ( !phrase ){                                             \
693                 new_chunk->set_content                                  \
694                     (index, &offset, sizeof(table_offset_t));           \
695                 index += sizeof(table_offset_t);                        \
696                 continue;                                               \
697             }                                                           \
698             phrase->store(new_chunk, offset, phrase_end);               \
699             offset = phrase_end;                                        \
700             break;                                                      \
701         }
702         switch ( i ){
703             CASE(0);
704             CASE(1);
705             CASE(2);
706             CASE(3);
707             CASE(4);
708             CASE(5);
709             CASE(6);
710             CASE(7);
711             CASE(8);
712             CASE(9);
713             CASE(10);
714             CASE(11);
715             CASE(12);
716             CASE(13);
717             CASE(14);
718             CASE(15);
719         default:
720             assert(false);
721         }
722         //add '#'
723         new_chunk->set_content(offset, &c_separate, sizeof(char));
724         offset += sizeof(char);
725         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
726         index += sizeof(table_offset_t);
727
728 #undef CASE                                                     
729     }
730     end = offset;
731     return true;
732 }
733
734 template<size_t phrase_length>
735 bool PinyinArrayIndexLevel<phrase_length>::
736 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
737     char * buf_begin = (char *) chunk->begin();
738     m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
739     return true;
740 }
741
742 template<size_t phrase_length>
743 bool PinyinArrayIndexLevel<phrase_length>::
744 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
745     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
746     end = offset + m_chunk.size();
747     return true;
748 }