Allow libpinyin to build in cross compile mode.
[platform/upstream/libpinyin.git] / src / storage / phrase_large_table2.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include <assert.h>
23 #include <string.h>
24 #include "phrase_large_table2.h"
25
26
27 /* class definition */
28
29 namespace pinyin{
30
31 class PhraseLengthIndexLevel2{
32 protected:
33     GArray * m_phrase_array_indexes;
34 public:
35     PhraseLengthIndexLevel2();
36     ~PhraseLengthIndexLevel2();
37
38     /* load/store method */
39     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
40     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
41
42     /* search method */
43     int search(int phrase_length, /* in */ const ucs4_t phrase[],
44                /* out */ PhraseTokens tokens) const;
45
46     /* add_index/remove_index method */
47     int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
48                   /* in */ phrase_token_t token);
49     int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
50                      /* in */ phrase_token_t token);
51
52     /* get length method */
53     int get_length() const;
54
55     /* mask out method */
56     bool mask_out(phrase_token_t mask, phrase_token_t value);
57 };
58
59
60 template<size_t phrase_length>
61 struct PhraseIndexItem2{
62     phrase_token_t m_token;
63     ucs4_t m_phrase[phrase_length];
64 public:
65     PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
66         memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
67         m_token = token;
68     }
69 };
70
71
72 template<size_t phrase_length>
73 class PhraseArrayIndexLevel2{
74 protected:
75     typedef PhraseIndexItem2<phrase_length> IndexItem;
76
77 protected:
78     MemoryChunk m_chunk;
79 public:
80     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
81     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
82
83     /* search method */
84     int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
85
86     /* add_index/remove_index method */
87     int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
88     int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
89
90     /* get length method */
91     int get_length() const;
92
93     /* mask out method */
94     bool mask_out(phrase_token_t mask, phrase_token_t value);
95 };
96
97 };
98
99 using namespace pinyin;
100
101 /* class implementation */
102
103 template<size_t phrase_length>
104 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
105                            const PhraseIndexItem2<phrase_length> &rhs){
106     ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
107     ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
108
109     return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
110 }
111
112 template<size_t phrase_length>
113 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
114                               const PhraseIndexItem2<phrase_length> & rhs){
115     return 0 > phrase_compare2(lhs, rhs);
116 }
117
118 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
119     memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
120 }
121
122 void PhraseBitmapIndexLevel2::reset(){
123     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
124         PhraseLengthIndexLevel2 * & length_array =
125             m_phrase_length_indexes[i];
126         if ( length_array )
127             delete length_array;
128         length_array = NULL;
129     }
130 }
131
132
133 /* search method */
134
135 int PhraseBitmapIndexLevel2::search(int phrase_length,
136                                     /* in */ const ucs4_t phrase[],
137                                     /* out */ PhraseTokens tokens) const {
138     assert(phrase_length > 0);
139
140     int result = SEARCH_NONE;
141     /* use the first 8-bit of the lower 16-bit for bitmap index,
142      * as most the higher 16-bit are zero.
143      */
144     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
145
146     PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
147     if ( phrase_array )
148         return phrase_array->search(phrase_length, phrase, tokens);
149     return result;
150 }
151
152 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
153     m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
154 }
155
156 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
157 #define CASE(len) case len:                                             \
158     {                                                                   \
159         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
160             (m_phrase_array_indexes,                                    \
161              PhraseArrayIndexLevel2<len> *, len - 1);                   \
162         if ( array ) {                                                  \
163             delete array;                                               \
164             array = NULL;                                               \
165         }                                                               \
166         break;                                                          \
167     }
168
169     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
170         switch (i){
171             CASE(1);
172             CASE(2);
173             CASE(3);
174             CASE(4);
175             CASE(5);
176             CASE(6);
177             CASE(7);
178             CASE(8);
179             CASE(9);
180             CASE(10);
181             CASE(11);
182             CASE(12);
183             CASE(13);
184             CASE(14);
185             CASE(15);
186             CASE(16);
187         default:
188             assert(false);
189         }
190     }
191     g_array_free(m_phrase_array_indexes, TRUE);
192 #undef CASE
193 }
194
195 int PhraseLengthIndexLevel2::search(int phrase_length,
196                                     /* in */ const ucs4_t phrase[],
197                                     /* out */ PhraseTokens tokens) const {
198     int result = SEARCH_NONE;
199     if(m_phrase_array_indexes->len < phrase_length)
200         return result;
201     if (m_phrase_array_indexes->len > phrase_length)
202         result |= SEARCH_CONTINUED;
203
204 #define CASE(len) case len:                                             \
205     {                                                                   \
206         PhraseArrayIndexLevel2<len> * array = g_array_index             \
207             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
208         if ( !array )                                                   \
209             return result;                                              \
210         result |= array->search(phrase, tokens);                        \
211         return result;                                                  \
212     }
213
214     switch ( phrase_length ){
215         CASE(1);
216         CASE(2);
217         CASE(3);
218         CASE(4);
219         CASE(5);
220         CASE(6);
221         CASE(7);
222         CASE(8);
223         CASE(9);
224         CASE(10);
225         CASE(11);
226         CASE(12);
227         CASE(13);
228         CASE(14);
229         CASE(15);
230         CASE(16);
231     default:
232         assert(false);
233     }
234 #undef CASE
235 }
236
237 template<size_t phrase_length>
238 int PhraseArrayIndexLevel2<phrase_length>::search
239 (/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
240     int result = SEARCH_NONE;
241
242     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
243     chunk_begin = (IndexItem *) m_chunk.begin();
244     chunk_end = (IndexItem *) m_chunk.end();
245
246     /* do the search */
247     IndexItem search_elem(phrase, -1);
248     std_lite::pair<IndexItem *, IndexItem *> range;
249     range = std_lite::equal_range
250         (chunk_begin, chunk_end, search_elem,
251          phrase_less_than2<phrase_length>);
252
253     const IndexItem * const begin = range.first;
254     const IndexItem * const end = range.second;
255     if (begin == end)
256         return result;
257
258     const IndexItem * iter = NULL;
259     GArray * array = NULL;
260
261     for (iter = begin; iter != end; ++iter) {
262         phrase_token_t token = iter->m_token;
263
264         /* filter out disabled sub phrase indices. */
265         array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
266         if (NULL == array)
267             continue;
268
269         result |= SEARCH_OK;
270
271         g_array_append_val(array, token);
272     }
273
274     return result;
275 }
276
277
278 /* add/remove index method */
279
280 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
281                                        /* in */ const ucs4_t phrase[],
282                                        /* in */ phrase_token_t token){
283     guint8 first_key =  (phrase[0] & 0xFF00) >> 8;
284
285     PhraseLengthIndexLevel2 * & length_array =
286         m_phrase_length_indexes[first_key];
287
288     if ( !length_array ){
289         length_array = new PhraseLengthIndexLevel2();
290     }
291     return length_array->add_index(phrase_length, phrase, token);
292 }
293
294 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
295                                          /* in */ const ucs4_t phrase[],
296                                          /* in */ phrase_token_t token){
297     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
298
299     PhraseLengthIndexLevel2 * & length_array =
300         m_phrase_length_indexes[first_key];
301
302     if (NULL == length_array)
303         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
304
305     int retval = length_array->remove_index(phrase_length, phrase, token);
306
307     /* remove empty array. */
308     if (0 == length_array->get_length()) {
309         delete length_array;
310         length_array = NULL;
311     }
312
313     return retval;
314 }
315
316 int PhraseLengthIndexLevel2::add_index(int phrase_length,
317                                        /* in */ const ucs4_t phrase[],
318                                        /* in */ phrase_token_t token) {
319     if (phrase_length >= MAX_PHRASE_LENGTH)
320         return ERROR_PHRASE_TOO_LONG;
321
322     if (m_phrase_array_indexes->len < phrase_length)
323         g_array_set_size(m_phrase_array_indexes, phrase_length);
324
325 #define CASE(len) case len:                                             \
326     {                                                                   \
327         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
328             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
329         if ( !array )                                                   \
330             array = new PhraseArrayIndexLevel2<len>;                    \
331         return array->add_index(phrase, token);                         \
332     }
333
334     switch(phrase_length){
335         CASE(1);
336         CASE(2);
337         CASE(3);
338         CASE(4);
339         CASE(5);
340         CASE(6);
341         CASE(7);
342         CASE(8);
343         CASE(9);
344         CASE(10);
345         CASE(11);
346         CASE(12);
347         CASE(13);
348         CASE(14);
349         CASE(15);
350         CASE(16);
351     default:
352         assert(false);
353     }
354
355 #undef CASE
356 }
357
358 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
359                                           /* in */ const ucs4_t phrase[],
360                                           /* in */ phrase_token_t token) {
361     if (phrase_length >= MAX_PHRASE_LENGTH)
362         return ERROR_PHRASE_TOO_LONG;
363
364     if (m_phrase_array_indexes->len < phrase_length)
365         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
366
367 #define CASE(len) case len:                                             \
368     {                                                                   \
369         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
370             (m_phrase_array_indexes,                                    \
371              PhraseArrayIndexLevel2<len> *, len - 1);                   \
372         if (NULL == array)                                              \
373             return ERROR_REMOVE_ITEM_DONOT_EXISTS;                      \
374         int retval = array->remove_index(phrase, token);                \
375                                                                         \
376         /* remove empty array. */                                       \
377         if (0 == array->get_length()) {                                 \
378             delete array;                                               \
379             array = NULL;                                               \
380                                                                         \
381             /* shrink self array. */                                    \
382             g_array_set_size(m_phrase_array_indexes,                    \
383                              get_length());                             \
384         }                                                               \
385         return retval;                                                  \
386     }
387
388     switch(phrase_length){
389         CASE(1);
390         CASE(2);
391         CASE(3);
392         CASE(4);
393         CASE(5);
394         CASE(6);
395         CASE(7);
396         CASE(8);
397         CASE(9);
398         CASE(10);
399         CASE(11);
400         CASE(12);
401         CASE(13);
402         CASE(14);
403         CASE(15);
404         CASE(16);
405     default:
406         assert(false);
407     }
408 #undef CASE
409 }
410
411 template<size_t phrase_length>
412 int PhraseArrayIndexLevel2<phrase_length>::add_index
413 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
414     IndexItem * begin, * end;
415
416     IndexItem add_elem(phrase, token);
417     begin = (IndexItem *) m_chunk.begin();
418     end   = (IndexItem *) m_chunk.end();
419
420     std_lite::pair<IndexItem *, IndexItem *> range;
421     range = std_lite::equal_range
422         (begin, end, add_elem, phrase_less_than2<phrase_length>);
423
424     IndexItem * cur_elem;
425     for (cur_elem = range.first;
426          cur_elem != range.second; ++cur_elem) {
427         if (cur_elem->m_token == token)
428             return ERROR_INSERT_ITEM_EXISTS;
429         if (cur_elem->m_token > token)
430             break;
431     }
432
433     int offset = (cur_elem - begin) * sizeof(IndexItem);
434     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
435     return ERROR_OK;
436 }
437
438 template<size_t phrase_length>
439 int PhraseArrayIndexLevel2<phrase_length>::remove_index
440 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
441     IndexItem * begin, * end;
442
443     IndexItem remove_elem(phrase, token);
444     begin = (IndexItem *) m_chunk.begin();
445     end   = (IndexItem *) m_chunk.end();
446
447     std_lite::pair<IndexItem *, IndexItem *> range;
448     range = std_lite::equal_range
449         (begin, end, remove_elem, phrase_less_than2<phrase_length>);
450
451     IndexItem * cur_elem;
452     for (cur_elem = range.first;
453          cur_elem != range.second; ++cur_elem) {
454         if (cur_elem->m_token == token)
455             break;
456     }
457
458     if (cur_elem == range.second)
459         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
460
461     int offset = (cur_elem - begin) * sizeof(IndexItem);
462     m_chunk.remove_content(offset, sizeof(IndexItem));
463     return ERROR_OK;
464 }
465
466
467 /* load text method */
468
469 bool PhraseLargeTable2::load_text(FILE * infile){
470     char pinyin[256];
471     char phrase[256];
472     phrase_token_t token;
473     size_t freq;
474
475     while (!feof(infile)) {
476         int num = fscanf(infile, "%s %s %u %ld",
477                          pinyin, phrase, &token, &freq);
478
479         if (4 != num)
480             continue;
481
482         if (feof(infile))
483             break;
484
485         glong phrase_len = g_utf8_strlen(phrase, -1);
486         ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
487         add_index(phrase_len, new_phrase, token);
488
489         g_free(new_phrase);
490     }
491     return true;
492 }
493
494
495 /* load/store method */
496
497 bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
498                                    table_offset_t offset,
499                                    table_offset_t end){
500     reset();
501     char * buf_begin = (char *) chunk->begin();
502     table_offset_t phrase_begin, phrase_end;
503     table_offset_t * index = (table_offset_t *) (buf_begin + offset);
504     phrase_end = *index;
505
506     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
507         phrase_begin = phrase_end;
508         index++;
509         phrase_end = *index;
510         if ( phrase_begin == phrase_end ) //null pointer
511             continue;
512
513         /* after reset() all phrases are null pointer. */
514         PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
515         m_phrase_length_indexes[i] = phrases;
516
517         phrases->load(chunk, phrase_begin, phrase_end - 1);
518         assert( phrase_end <= end );
519         assert( *(buf_begin + phrase_end - 1) == c_separate);
520     }
521     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
522     assert( c_separate == *(buf_begin + offset) );
523     return true;
524 }
525
526 bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
527                                     table_offset_t offset,
528                                     table_offset_t & end){
529     table_offset_t phrase_end;
530     table_offset_t index = offset;
531     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
532     //add '#'
533     new_chunk->set_content(offset, &c_separate, sizeof(char));
534     offset +=sizeof(char);
535     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
536     index += sizeof(table_offset_t);
537     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
538         PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
539         if ( !phrases ) { //null pointer
540             new_chunk->set_content(index, &offset, sizeof(table_offset_t));
541             index += sizeof(table_offset_t);
542             continue;
543         }
544         phrases->store(new_chunk, offset, phrase_end); //has a end '#'
545         offset = phrase_end;
546         //add '#'
547         new_chunk->set_content(offset, &c_separate, sizeof(char));
548         offset += sizeof(char);
549         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
550         index += sizeof(table_offset_t);
551     }
552     end = offset;
553     return true;
554 }
555
556 bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
557                                    table_offset_t offset,
558                                    table_offset_t end) {
559     char * buf_begin = (char *) chunk->begin();
560     guint32 nindex = *((guint32 *)(buf_begin + offset));
561     table_offset_t * index = (table_offset_t *)
562         (buf_begin + offset + sizeof(guint32));
563
564     table_offset_t phrase_begin, phrase_end = *index;
565     g_array_set_size(m_phrase_array_indexes, 0);
566     for (size_t i = 1; i <= nindex; ++i) {
567         phrase_begin = phrase_end;
568         index++;
569         phrase_end = *index;
570         if ( phrase_begin == phrase_end ){
571             void * null = NULL;
572             g_array_append_val(m_phrase_array_indexes, null);
573             continue;
574         }
575
576 #define CASE(len) case len:                                             \
577         {                                                               \
578             PhraseArrayIndexLevel2<len> * phrase =                      \
579                 new PhraseArrayIndexLevel2<len>;                        \
580             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
581             assert( *(buf_begin + phrase_end - 1) == c_separate );      \
582             assert( phrase_end <= end );                                \
583             g_array_append_val(m_phrase_array_indexes, phrase);         \
584             break;                                                      \
585         }
586         switch ( i ){
587             CASE(1);
588             CASE(2);
589             CASE(3);
590             CASE(4);
591             CASE(5);
592             CASE(6);
593             CASE(7);
594             CASE(8);
595             CASE(9);
596             CASE(10);
597             CASE(11);
598             CASE(12);
599             CASE(13);
600             CASE(14);
601             CASE(15);
602             CASE(16);
603         default:
604             assert(false);
605         }
606 #undef CASE
607     }
608     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
609     assert ( c_separate == * (buf_begin + offset) );
610     return true;
611 }
612
613 bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
614                                     table_offset_t offset,
615                                     table_offset_t & end) {
616     guint32 nindex = m_phrase_array_indexes->len;
617     new_chunk->set_content(offset, &nindex, sizeof(guint32));
618     table_offset_t index = offset + sizeof(guint32);
619
620     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
621     new_chunk->set_content(offset, &c_separate, sizeof(char));
622     offset += sizeof(char);
623     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
624     index += sizeof(table_offset_t);
625
626     table_offset_t phrase_end;
627     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
628 #define CASE(len) case len:                                             \
629         {                                                               \
630             PhraseArrayIndexLevel2<len> * phrase = g_array_index        \
631                 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
632             if ( !phrase ){                                             \
633                 new_chunk->set_content                                  \
634                     (index, &offset, sizeof(table_offset_t));           \
635                 index += sizeof(table_offset_t);                        \
636                 continue;                                               \
637             }                                                           \
638             phrase->store(new_chunk, offset, phrase_end);               \
639             offset = phrase_end;                                        \
640             break;                                                      \
641         }
642         switch ( i ){
643             CASE(1);
644             CASE(2);
645             CASE(3);
646             CASE(4);
647             CASE(5);
648             CASE(6);
649             CASE(7);
650             CASE(8);
651             CASE(9);
652             CASE(10);
653             CASE(11);
654             CASE(12);
655             CASE(13);
656             CASE(14);
657             CASE(15);
658             CASE(16);
659         default:
660             assert(false);
661         }
662         //add '#'
663         new_chunk->set_content(offset, &c_separate, sizeof(char));
664         offset += sizeof(char);
665         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
666         index += sizeof(table_offset_t);
667
668 #undef CASE
669     }
670     end = offset;
671     return true;
672 }
673
674 template<size_t phrase_length>
675 bool PhraseArrayIndexLevel2<phrase_length>::
676 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
677     char * buf_begin = (char *) chunk->begin();
678     m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
679     return true;
680 }
681
682 template<size_t phrase_length>
683 bool PhraseArrayIndexLevel2<phrase_length>::
684 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
685     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
686     end = offset + m_chunk.size();
687     return true;
688 }
689
690
691 /* get length method */
692
693 int PhraseLengthIndexLevel2::get_length() const {
694     int length = m_phrase_array_indexes->len;
695
696     /* trim trailing zero. */
697     for (int i = length - 1; i >= 0; --i) {
698         void * array = g_array_index(m_phrase_array_indexes, void *, i);
699
700         if (NULL != array)
701             break;
702
703         --length;
704     }
705
706     return length;
707 }
708
709 template<size_t phrase_length>
710 int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
711     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
712     chunk_begin = (IndexItem *) m_chunk.begin();
713     chunk_end = (IndexItem *) m_chunk.end();
714
715     return chunk_end - chunk_begin;
716 }
717
718
719 /* mask out method */
720
721 bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
722                                        phrase_token_t value){
723     for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
724         PhraseLengthIndexLevel2 * & length_array =
725             m_phrase_length_indexes[i];
726
727         if (NULL == length_array)
728             continue;
729
730         length_array->mask_out(mask, value);
731
732         if (0 == length_array->get_length()) {
733             delete length_array;
734             length_array = NULL;
735         }
736     }
737
738     return true;
739 }
740
741 bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
742                                        phrase_token_t value){
743 #define CASE(len) case len:                                     \
744     {                                                           \
745         PhraseArrayIndexLevel2<len> * & array = g_array_index   \
746             (m_phrase_array_indexes,                            \
747              PhraseArrayIndexLevel2<len> *, len - 1);           \
748                                                                 \
749         if (NULL == array)                                      \
750             continue;                                           \
751                                                                 \
752         array->mask_out(mask, value);                           \
753                                                                 \
754         if (0 == array->get_length()) {                         \
755             delete array;                                       \
756             array = NULL;                                       \
757         }                                                       \
758         break;                                                  \
759     }
760
761     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
762         switch (i) {
763             CASE(1);
764             CASE(2);
765             CASE(3);
766             CASE(4);
767             CASE(5);
768             CASE(6);
769             CASE(7);
770             CASE(8);
771             CASE(9);
772             CASE(10);
773             CASE(11);
774             CASE(12);
775             CASE(13);
776             CASE(14);
777             CASE(15);
778             CASE(16);
779         default:
780             assert(false);
781         }
782     }
783     /* shrink self array. */
784     g_array_set_size(m_phrase_array_indexes, get_length());
785 #undef CASE
786     return true;
787 }
788
789 template<size_t phrase_length>
790 bool PhraseArrayIndexLevel2<phrase_length>::mask_out
791 (phrase_token_t mask, phrase_token_t value) {
792     IndexItem * begin = NULL, * end = NULL;
793     begin = (IndexItem *) m_chunk.begin();
794     end = (IndexItem *) m_chunk.end();
795
796     for (IndexItem * cur = begin; cur != end; ++cur) {
797         if ((cur->m_token & mask) != value)
798             continue;
799
800         int offset = (cur - begin) * sizeof(IndexItem);
801         m_chunk.remove_content(offset, sizeof(IndexItem));
802
803         /* update chunk end. */
804         end = (IndexItem *) m_chunk.end();
805         --cur;
806     }
807
808     return true;
809 }