add get_shengmu/yunmu_string
[platform/upstream/libpinyin.git] / src / storage / phrase_large_table2.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include <assert.h>
23 #include <string.h>
24 #include "phrase_large_table2.h"
25
26
27 /* class definition */
28
29 namespace pinyin{
30
31 class PhraseLengthIndexLevel2{
32 protected:
33     GArray * m_phrase_array_indexes;
34 public:
35     PhraseLengthIndexLevel2();
36     ~PhraseLengthIndexLevel2();
37
38     /* load/store method */
39     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
40     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
41
42     /* search method */
43     int search(int phrase_length, /* in */ ucs4_t phrase[],
44                /* out */ PhraseTokens tokens) const;
45
46     /* add_index/remove_index method */
47     int add_index(int phrase_length, /* in */ ucs4_t phrase[],
48                   /* in */ phrase_token_t token);
49     int remove_index(int phrase_length, /* in */ ucs4_t phrase[],
50                      /* in */ phrase_token_t token);
51
52     /* get length method */
53     int get_length() const;
54
55     /* mask out method */
56     bool mask_out(phrase_token_t mask, phrase_token_t value);
57 };
58
59
60 template<size_t phrase_length>
61 struct PhraseIndexItem2{
62     phrase_token_t m_token;
63     ucs4_t m_phrase[phrase_length];
64 public:
65     PhraseIndexItem2<phrase_length>(ucs4_t phrase[], phrase_token_t token){
66         memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
67         m_token = token;
68     }
69 };
70
71
72 template<size_t phrase_length>
73 class PhraseArrayIndexLevel2{
74 protected:
75     typedef PhraseIndexItem2<phrase_length> IndexItem;
76
77 protected:
78     MemoryChunk m_chunk;
79 public:
80     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
81     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
82
83     /* search method */
84     int search(/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
85
86     /* add_index/remove_index method */
87     int add_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
88     int remove_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
89
90     /* get length method */
91     int get_length() const;
92
93     /* mask out method */
94     bool mask_out(phrase_token_t mask, phrase_token_t value);
95 };
96
97 };
98
99 using namespace pinyin;
100
101 /* class implementation */
102
103 template<size_t phrase_length>
104 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
105                            const PhraseIndexItem2<phrase_length> &rhs){
106     ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
107     ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
108
109     return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
110 }
111
112 template<size_t phrase_length>
113 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
114                               const PhraseIndexItem2<phrase_length> & rhs){
115     return 0 > phrase_compare2(lhs, rhs);
116 }
117
118 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
119     memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
120 }
121
122 void PhraseBitmapIndexLevel2::reset(){
123     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
124         PhraseLengthIndexLevel2 * & length_array =
125             m_phrase_length_indexes[i];
126         if ( length_array )
127             delete length_array;
128         length_array = NULL;
129     }
130 }
131
132
133 /* search method */
134
135 int PhraseBitmapIndexLevel2::search(int phrase_length,
136                                     /* in */ ucs4_t phrase[],
137                                     /* out */ PhraseTokens tokens) const {
138     assert(phrase_length > 0);
139
140     int result = SEARCH_NONE;
141     /* use the first 8-bit of the lower 16-bit for bitmap index,
142      * as most the higher 16-bit are zero.
143      */
144     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
145
146     PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
147     if ( phrase_array )
148         return phrase_array->search(phrase_length, phrase, tokens);
149     return result;
150 }
151
152 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
153     m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
154 }
155
156 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
157 #define CASE(len) case len:                                             \
158     {                                                                   \
159         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
160             (m_phrase_array_indexes,                                    \
161              PhraseArrayIndexLevel2<len> *, len - 1);                   \
162         if ( array ) {                                                  \
163             delete array;                                               \
164             array = NULL;                                               \
165         }                                                               \
166         break;                                                          \
167     }
168
169     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
170         switch (i){
171             CASE(1);
172             CASE(2);
173             CASE(3);
174             CASE(4);
175             CASE(5);
176             CASE(6);
177             CASE(7);
178             CASE(8);
179             CASE(9);
180             CASE(10);
181             CASE(11);
182             CASE(12);
183             CASE(13);
184             CASE(14);
185             CASE(15);
186             CASE(16);
187         default:
188             assert(false);
189         }
190     }
191     g_array_free(m_phrase_array_indexes, TRUE);
192 #undef CASE
193 }
194
195 int PhraseLengthIndexLevel2::search(int phrase_length,
196                                     /* in */ ucs4_t phrase[],
197                                     /* out */ PhraseTokens tokens) const {
198     int result = SEARCH_NONE;
199     if(m_phrase_array_indexes->len < phrase_length)
200         return result;
201     if (m_phrase_array_indexes->len > phrase_length)
202         result |= SEARCH_CONTINUED;
203
204 #define CASE(len) case len:                                             \
205     {                                                                   \
206         PhraseArrayIndexLevel2<len> * array = g_array_index             \
207             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
208         if ( !array )                                                   \
209             return result;                                              \
210         result |= array->search(phrase, tokens);                        \
211         return result;                                                  \
212     }
213
214     switch ( phrase_length ){
215         CASE(1);
216         CASE(2);
217         CASE(3);
218         CASE(4);
219         CASE(5);
220         CASE(6);
221         CASE(7);
222         CASE(8);
223         CASE(9);
224         CASE(10);
225         CASE(11);
226         CASE(12);
227         CASE(13);
228         CASE(14);
229         CASE(15);
230         CASE(16);
231     default:
232         assert(false);
233     }
234 #undef CASE
235 }
236
237 template<size_t phrase_length>
238 int PhraseArrayIndexLevel2<phrase_length>::search
239 (/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
240     int result = SEARCH_NONE;
241
242     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
243     chunk_begin = (IndexItem *) m_chunk.begin();
244     chunk_end = (IndexItem *) m_chunk.end();
245
246     /* do the search */
247     IndexItem search_elem(phrase, -1);
248     std_lite::pair<IndexItem *, IndexItem *> range;
249     range = std_lite::equal_range
250         (chunk_begin, chunk_end, search_elem,
251          phrase_less_than2<phrase_length>);
252
253     const IndexItem * const begin = range.first;
254     const IndexItem * const end = range.second;
255     if (begin == end)
256         return result;
257
258     const IndexItem * iter = NULL;
259     GArray * array = NULL;
260
261     for (iter = begin; iter != end; ++iter) {
262         phrase_token_t token = iter->m_token;
263
264         /* filter out disabled sub phrase indices. */
265         array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
266         if (NULL == array)
267             continue;
268
269         result |= SEARCH_OK;
270
271         g_array_append_val(array, token);
272     }
273
274     return result;
275 }
276
277
278 /* add/remove index method */
279
280 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
281                                        /* in */ ucs4_t phrase[],
282                                        /* in */ phrase_token_t token){
283     guint8 first_key =  (phrase[0] & 0xFF00) >> 8;
284
285     PhraseLengthIndexLevel2 * & length_array =
286         m_phrase_length_indexes[first_key];
287
288     if ( !length_array ){
289         length_array = new PhraseLengthIndexLevel2();
290     }
291     return length_array->add_index(phrase_length, phrase, token);
292 }
293
294 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
295                                          /* in */ ucs4_t phrase[],
296                                          /* in */ phrase_token_t token){
297     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
298
299     PhraseLengthIndexLevel2 * & length_array =
300         m_phrase_length_indexes[first_key];
301
302     if (NULL == length_array)
303         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
304
305     int retval = length_array->remove_index(phrase_length, phrase, token);
306
307     /* remove empty array. */
308     if (0 == length_array->get_length()) {
309         delete length_array;
310         length_array = NULL;
311     }
312
313     return retval;
314 }
315
316 int PhraseLengthIndexLevel2::add_index(int phrase_length,
317                                        /* in */ ucs4_t phrase[],
318                                        /* in */ phrase_token_t token) {
319     if (phrase_length >= MAX_PHRASE_LENGTH)
320         return ERROR_PHRASE_TOO_LONG;
321
322     if (m_phrase_array_indexes->len < phrase_length)
323         g_array_set_size(m_phrase_array_indexes, phrase_length);
324
325 #define CASE(len) case len:                                             \
326     {                                                                   \
327         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
328             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
329         if ( !array )                                                   \
330             array = new PhraseArrayIndexLevel2<len>;                    \
331         return array->add_index(phrase, token);                         \
332     }
333
334     switch(phrase_length){
335         CASE(1);
336         CASE(2);
337         CASE(3);
338         CASE(4);
339         CASE(5);
340         CASE(6);
341         CASE(7);
342         CASE(8);
343         CASE(9);
344         CASE(10);
345         CASE(11);
346         CASE(12);
347         CASE(13);
348         CASE(14);
349         CASE(15);
350         CASE(16);
351     default:
352         assert(false);
353     }
354
355 #undef CASE
356 }
357
358 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
359                                           /* in */ ucs4_t phrase[],
360                                           /* in */ phrase_token_t token) {
361     if (phrase_length >= MAX_PHRASE_LENGTH)
362         return ERROR_PHRASE_TOO_LONG;
363
364     if (m_phrase_array_indexes->len < phrase_length)
365         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
366
367 #define CASE(len) case len:                                             \
368     {                                                                   \
369         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
370             (m_phrase_array_indexes,                                    \
371              PhraseArrayIndexLevel2<len> *, len - 1);                   \
372         if (NULL == array)                                              \
373             return ERROR_REMOVE_ITEM_DONOT_EXISTS;                      \
374         int retval = array->remove_index(phrase, token);                \
375                                                                         \
376         /* remove empty array. */                                       \
377         if (0 == array->get_length()) {                                 \
378             delete array;                                               \
379             array = NULL;                                               \
380                                                                         \
381             /* shrink self array. */                                    \
382             g_array_set_size(m_phrase_array_indexes,                    \
383                              get_length());                             \
384         }                                                               \
385         return retval;                                                  \
386     }
387
388     switch(phrase_length){
389         CASE(1);
390         CASE(2);
391         CASE(3);
392         CASE(4);
393         CASE(5);
394         CASE(6);
395         CASE(7);
396         CASE(8);
397         CASE(9);
398         CASE(10);
399         CASE(11);
400         CASE(12);
401         CASE(13);
402         CASE(14);
403         CASE(15);
404         CASE(16);
405     default:
406         assert(false);
407     }
408 #undef CASE
409 }
410
411 template<size_t phrase_length>
412 int PhraseArrayIndexLevel2<phrase_length>::add_index
413 (/* in */ ucs4_t phrase[], /* in */ phrase_token_t token){
414     IndexItem * begin, * end;
415
416     IndexItem add_elem(phrase, token);
417     begin = (IndexItem *) m_chunk.begin();
418     end   = (IndexItem *) m_chunk.end();
419
420     std_lite::pair<IndexItem *, IndexItem *> range;
421     range = std_lite::equal_range
422         (begin, end, add_elem, phrase_less_than2<phrase_length>);
423
424     IndexItem * cur_elem;
425     for (cur_elem = range.first;
426          cur_elem != range.second; ++cur_elem) {
427         if (cur_elem->m_token == token)
428             return ERROR_INSERT_ITEM_EXISTS;
429         if (cur_elem->m_token > token)
430             break;
431     }
432
433     int offset = (cur_elem - begin) * sizeof(IndexItem);
434     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
435     return ERROR_OK;
436 }
437
438 template<size_t phrase_length>
439 int PhraseArrayIndexLevel2<phrase_length>::remove_index
440 (/* in */ ucs4_t phrase[], /* in */ phrase_token_t token) {
441     IndexItem * begin, * end;
442
443     IndexItem remove_elem(phrase, token);
444     begin = (IndexItem *) m_chunk.begin();
445     end   = (IndexItem *) m_chunk.end();
446
447     std_lite::pair<IndexItem *, IndexItem *> range;
448     range = std_lite::equal_range
449         (begin, end, remove_elem, phrase_less_than2<phrase_length>);
450
451     IndexItem * cur_elem;
452     for (cur_elem = range.first;
453          cur_elem != range.second; ++cur_elem) {
454         if (cur_elem->m_token == token)
455             break;
456     }
457
458     if (cur_elem == range.second)
459         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
460
461     int offset = (cur_elem - begin) * sizeof(IndexItem);
462     m_chunk.remove_content(offset, sizeof(IndexItem));
463     return ERROR_OK;
464 }
465
466
467 /* load text method */
468
469 bool PhraseLargeTable2::load_text(FILE * infile){
470     char pinyin[256];
471     char phrase[256];
472     phrase_token_t token;
473     size_t freq;
474
475     while ( !feof(infile) ) {
476         fscanf(infile, "%s", pinyin);
477         fscanf(infile, "%s", phrase);
478         fscanf(infile, "%u", &token);
479         fscanf(infile, "%ld", &freq);
480
481         if ( feof(infile) )
482             break;
483
484         glong phrase_len = g_utf8_strlen(phrase, -1);
485         ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
486         add_index(phrase_len, new_phrase, token);
487
488         g_free(new_phrase);
489     }
490     return true;
491 }
492
493
494 /* load/store method */
495
496 bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
497                                    table_offset_t offset,
498                                    table_offset_t end){
499     reset();
500     char * buf_begin = (char *) chunk->begin();
501     table_offset_t phrase_begin, phrase_end;
502     table_offset_t * index = (table_offset_t *) (buf_begin + offset);
503     phrase_end = *index;
504
505     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
506         phrase_begin = phrase_end;
507         index++;
508         phrase_end = *index;
509         if ( phrase_begin == phrase_end ) //null pointer
510             continue;
511
512         /* after reset() all phrases are null pointer. */
513         PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
514         m_phrase_length_indexes[i] = phrases;
515
516         phrases->load(chunk, phrase_begin, phrase_end - 1);
517         assert( phrase_end <= end );
518         assert( *(buf_begin + phrase_end - 1) == c_separate);
519     }
520     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
521     assert( c_separate == *(buf_begin + offset) );
522     return true;
523 }
524
525 bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
526                                     table_offset_t offset,
527                                     table_offset_t & end){
528     table_offset_t phrase_end;
529     table_offset_t index = offset;
530     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
531     //add '#'
532     new_chunk->set_content(offset, &c_separate, sizeof(char));
533     offset +=sizeof(char);
534     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
535     index += sizeof(table_offset_t);
536     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
537         PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
538         if ( !phrases ) { //null pointer
539             new_chunk->set_content(index, &offset, sizeof(table_offset_t));
540             index += sizeof(table_offset_t);
541             continue;
542         }
543         phrases->store(new_chunk, offset, phrase_end); //has a end '#'
544         offset = phrase_end;
545         //add '#'
546         new_chunk->set_content(offset, &c_separate, sizeof(char));
547         offset += sizeof(char);
548         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
549         index += sizeof(table_offset_t);
550     }
551     end = offset;
552     return true;
553 }
554
555 bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
556                                    table_offset_t offset,
557                                    table_offset_t end) {
558     char * buf_begin = (char *) chunk->begin();
559     guint32 nindex = *((guint32 *)(buf_begin + offset));
560     table_offset_t * index = (table_offset_t *)
561         (buf_begin + offset + sizeof(guint32));
562
563     table_offset_t phrase_begin, phrase_end = *index;
564     g_array_set_size(m_phrase_array_indexes, 0);
565     for (size_t i = 1; i <= nindex; ++i) {
566         phrase_begin = phrase_end;
567         index++;
568         phrase_end = *index;
569         if ( phrase_begin == phrase_end ){
570             void * null = NULL;
571             g_array_append_val(m_phrase_array_indexes, null);
572             continue;
573         }
574
575 #define CASE(len) case len:                                             \
576         {                                                               \
577             PhraseArrayIndexLevel2<len> * phrase =                      \
578                 new PhraseArrayIndexLevel2<len>;                        \
579             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
580             assert( *(buf_begin + phrase_end - 1) == c_separate );      \
581             assert( phrase_end <= end );                                \
582             g_array_append_val(m_phrase_array_indexes, phrase);         \
583             break;                                                      \
584         }
585         switch ( i ){
586             CASE(1);
587             CASE(2);
588             CASE(3);
589             CASE(4);
590             CASE(5);
591             CASE(6);
592             CASE(7);
593             CASE(8);
594             CASE(9);
595             CASE(10);
596             CASE(11);
597             CASE(12);
598             CASE(13);
599             CASE(14);
600             CASE(15);
601             CASE(16);
602         default:
603             assert(false);
604         }
605 #undef CASE
606     }
607     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
608     assert ( c_separate == * (buf_begin + offset) );
609     return true;
610 }
611
612 bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
613                                     table_offset_t offset,
614                                     table_offset_t & end) {
615     guint32 nindex = m_phrase_array_indexes->len;
616     new_chunk->set_content(offset, &nindex, sizeof(guint32));
617     table_offset_t index = offset + sizeof(guint32);
618
619     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
620     new_chunk->set_content(offset, &c_separate, sizeof(char));
621     offset += sizeof(char);
622     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
623     index += sizeof(table_offset_t);
624
625     table_offset_t phrase_end;
626     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
627 #define CASE(len) case len:                                             \
628         {                                                               \
629             PhraseArrayIndexLevel2<len> * phrase = g_array_index        \
630                 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
631             if ( !phrase ){                                             \
632                 new_chunk->set_content                                  \
633                     (index, &offset, sizeof(table_offset_t));           \
634                 index += sizeof(table_offset_t);                        \
635                 continue;                                               \
636             }                                                           \
637             phrase->store(new_chunk, offset, phrase_end);               \
638             offset = phrase_end;                                        \
639             break;                                                      \
640         }
641         switch ( i ){
642             CASE(1);
643             CASE(2);
644             CASE(3);
645             CASE(4);
646             CASE(5);
647             CASE(6);
648             CASE(7);
649             CASE(8);
650             CASE(9);
651             CASE(10);
652             CASE(11);
653             CASE(12);
654             CASE(13);
655             CASE(14);
656             CASE(15);
657             CASE(16);
658         default:
659             assert(false);
660         }
661         //add '#'
662         new_chunk->set_content(offset, &c_separate, sizeof(char));
663         offset += sizeof(char);
664         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
665         index += sizeof(table_offset_t);
666
667 #undef CASE
668     }
669     end = offset;
670     return true;
671 }
672
673 template<size_t phrase_length>
674 bool PhraseArrayIndexLevel2<phrase_length>::
675 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
676     char * buf_begin = (char *) chunk->begin();
677     m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
678     return true;
679 }
680
681 template<size_t phrase_length>
682 bool PhraseArrayIndexLevel2<phrase_length>::
683 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
684     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
685     end = offset + m_chunk.size();
686     return true;
687 }
688
689
690 /* get length method */
691
692 int PhraseLengthIndexLevel2::get_length() const {
693     int length = m_phrase_array_indexes->len;
694
695     /* trim trailing zero. */
696     for (int i = length - 1; i >= 0; --i) {
697         void * array = g_array_index(m_phrase_array_indexes, void *, i);
698
699         if (NULL != array)
700             break;
701
702         --length;
703     }
704
705     return length;
706 }
707
708 template<size_t phrase_length>
709 int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
710     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
711     chunk_begin = (IndexItem *) m_chunk.begin();
712     chunk_end = (IndexItem *) m_chunk.end();
713
714     return chunk_end - chunk_begin;
715 }
716
717
718 /* mask out method */
719
720 bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
721                                        phrase_token_t value){
722     for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
723         PhraseLengthIndexLevel2 * & length_array =
724             m_phrase_length_indexes[i];
725
726         if (NULL == length_array)
727             continue;
728
729         length_array->mask_out(mask, value);
730
731         if (0 == length_array->get_length()) {
732             delete length_array;
733             length_array = NULL;
734         }
735     }
736
737     return true;
738 }
739
740 bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
741                                        phrase_token_t value){
742 #define CASE(len) case len:                                     \
743     {                                                           \
744         PhraseArrayIndexLevel2<len> * & array = g_array_index   \
745             (m_phrase_array_indexes,                            \
746              PhraseArrayIndexLevel2<len> *, len - 1);           \
747                                                                 \
748         if (NULL == array)                                      \
749             continue;                                           \
750                                                                 \
751         array->mask_out(mask, value);                           \
752                                                                 \
753         if (0 == array->get_length()) {                         \
754             delete array;                                       \
755             array = NULL;                                       \
756         }                                                       \
757         break;                                                  \
758     }
759
760     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
761         switch (i) {
762             CASE(1);
763             CASE(2);
764             CASE(3);
765             CASE(4);
766             CASE(5);
767             CASE(6);
768             CASE(7);
769             CASE(8);
770             CASE(9);
771             CASE(10);
772             CASE(11);
773             CASE(12);
774             CASE(13);
775             CASE(14);
776             CASE(15);
777             CASE(16);
778         default:
779             assert(false);
780         }
781     }
782     /* shrink self array. */
783     g_array_set_size(m_phrase_array_indexes, get_length());
784 #undef CASE
785     return true;
786 }
787
788 template<size_t phrase_length>
789 bool PhraseArrayIndexLevel2<phrase_length>::mask_out
790 (phrase_token_t mask, phrase_token_t value) {
791     IndexItem * begin = NULL, * end = NULL;
792     begin = (IndexItem *) m_chunk.begin();
793     end = (IndexItem *) m_chunk.end();
794
795     for (IndexItem * cur = begin; cur != end; ++cur) {
796         if ((cur->m_token & mask) != value)
797             continue;
798
799         int offset = (cur - begin) * sizeof(IndexItem);
800         m_chunk.remove_content(offset, sizeof(IndexItem));
801
802         /* update chunk end. */
803         end = (IndexItem *) m_chunk.end();
804         --cur;
805     }
806
807     return true;
808 }