rename phrase_compare and phrase_less_than
[platform/upstream/libpinyin.git] / src / storage / phrase_large_table2.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include <assert.h>
23 #include <string.h>
24 #include "phrase_large_table2.h"
25
26
27 /* class definition */
28
29 namespace pinyin{
30
31 class PhraseLengthIndexLevel2{
32 protected:
33     GArray * m_phrase_array_indexes;
34 public:
35     PhraseLengthIndexLevel2();
36     ~PhraseLengthIndexLevel2();
37
38     /* load/store method */
39     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
40     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
41
42     /* search method */
43     int search(int phrase_length, /* in */ ucs4_t phrase[],
44                /* out */ PhraseTokens tokens) const;
45
46     /* add_index/remove_index method */
47     int add_index(int phrase_length, /* in */ ucs4_t phrase[],
48                   /* in */ phrase_token_t token);
49     int remove_index(int phrase_length, /* in */ ucs4_t phrase[],
50                      /* in */ phrase_token_t token);
51 };
52
53
54 template<size_t phrase_length>
55 struct PhraseIndexItem2{
56     phrase_token_t m_token;
57     ucs4_t m_phrase[phrase_length];
58 public:
59     PhraseIndexItem2<phrase_length>(ucs4_t phrase[], phrase_token_t token){
60         memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
61         m_token = token;
62     }
63 };
64
65
66 template<size_t phrase_length>
67 class PhraseArrayIndexLevel2{
68 protected:
69     typedef PhraseIndexItem2<phrase_length> IndexItem;
70
71 protected:
72     MemoryChunk m_chunk;
73 public:
74     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
75     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
76
77     /* search method */
78     int search(/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
79
80     /* add_index/remove_index method */
81     int add_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
82     int remove_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
83 };
84
85 };
86
87 using namespace pinyin;
88
89 /* class implementation */
90
91 template<size_t phrase_length>
92 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
93                            const PhraseIndexItem2<phrase_length> &rhs){
94     ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
95     ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
96
97     return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
98 }
99
100 template<size_t phrase_length>
101 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
102                               const PhraseIndexItem2<phrase_length> & rhs){
103     return 0 > phrase_compare2(lhs, rhs);
104 }
105
106 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
107     memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
108 }
109
110 void PhraseBitmapIndexLevel2::reset(){
111     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
112         PhraseLengthIndexLevel2 * length_array =
113             m_phrase_length_indexes[i];
114         if ( length_array )
115             delete length_array;
116     }
117 }
118
119 int PhraseBitmapIndexLevel2::search(int phrase_length,
120                                     /* in */ ucs4_t phrase[],
121                                     /* out */ PhraseTokens tokens) const {
122     assert(phrase_length > 0);
123
124     int result = SEARCH_NONE;
125     /* use the first 8-bit of the lower 16-bit for bitmap index,
126      * as most the higher 16-bit are zero.
127      */
128     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
129
130     PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
131     if ( phrase_array )
132         return phrase_array->search(phrase_length, phrase, tokens);
133     return result;
134 }
135
136 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
137     m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
138 }
139
140 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
141 #define CASE(len) case len:                                             \
142     {                                                                   \
143         PhraseArrayIndexLevel2<len> * & array =  g_array_index          \
144             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
145         if ( array ) {                                                  \
146             delete array;                                               \
147             array = NULL;                                               \
148         }                                                               \
149         break;                                                          \
150     }
151
152     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
153         switch (i){
154             CASE(1);
155             CASE(2);
156             CASE(3);
157             CASE(4);
158             CASE(5);
159             CASE(6);
160             CASE(7);
161             CASE(8);
162             CASE(9);
163             CASE(10);
164             CASE(11);
165             CASE(12);
166             CASE(13);
167             CASE(14);
168             CASE(15);
169             CASE(16);
170         default:
171             assert(false);
172         }
173     }
174     g_array_free(m_phrase_array_indexes, TRUE);
175 #undef CASE
176 }
177
178 int PhraseLengthIndexLevel2::search(int phrase_length,
179                                     /* in */ ucs4_t phrase[],
180                                     /* out */ PhraseTokens tokens) const {
181     int result = SEARCH_NONE;
182     if(m_phrase_array_indexes->len < phrase_length)
183         return result;
184     if (m_phrase_array_indexes->len > phrase_length)
185         result |= SEARCH_CONTINUED;
186
187 #define CASE(len) case len:                                             \
188     {                                                                   \
189         PhraseArrayIndexLevel2<len> * array = g_array_index             \
190             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
191         if ( !array )                                                   \
192             return result;                                              \
193         result |= array->search(phrase, tokens);                        \
194         return result;                                                  \
195     }
196
197     switch ( phrase_length ){
198         CASE(1);
199         CASE(2);
200         CASE(3);
201         CASE(4);
202         CASE(5);
203         CASE(6);
204         CASE(7);
205         CASE(8);
206         CASE(9);
207         CASE(10);
208         CASE(11);
209         CASE(12);
210         CASE(13);
211         CASE(14);
212         CASE(15);
213         CASE(16);
214     default:
215         assert(false);
216     }
217 #undef CASE
218 }
219
220 template<size_t phrase_length>
221 int PhraseArrayIndexLevel2<phrase_length>::search
222 (/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
223     int result = SEARCH_NONE;
224
225     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
226     chunk_begin = (IndexItem *) m_chunk.begin();
227     chunk_end = (IndexItem *) m_chunk.end();
228
229     /* do the search */
230     IndexItem item(phrase, -1);
231     std_lite::pair<IndexItem *, IndexItem *> range;
232     range = std_lite::equal_range
233         (chunk_begin, chunk_end, item,
234          phrase_less_than2<phrase_length>);
235
236     const IndexItem * const begin = range.first;
237     const IndexItem * const end = range.second;
238     if (begin == end)
239         return result;
240
241     const IndexItem * iter = NULL;
242     GArray * array = NULL;
243
244     for (iter = begin; iter != end; ++iter) {
245         phrase_token_t token = iter->m_token;
246
247         /* filter out disabled sub phrase indices. */
248         array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
249         if (NULL == array)
250             continue;
251
252         result |= SEARCH_OK;
253
254         g_array_append_val(array, token);
255     }
256
257     return result;
258 }
259
260 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
261                                        /* in */ ucs4_t phrase[],
262                                        /* in */ phrase_token_t token){
263     guint8 first_key =  (phrase[0] & 0xFF00) >> 8;
264
265     PhraseLengthIndexLevel2 * & length_array =
266         m_phrase_length_indexes[first_key];
267
268     if ( !length_array ){
269         length_array = new PhraseLengthIndexLevel2();
270     }
271     return length_array->add_index(phrase_length, phrase, token);
272 }
273
274 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
275                                          /* in */ ucs4_t phrase[],
276                                          /* in */ phrase_token_t token){
277     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
278
279     PhraseLengthIndexLevel2 * & length_array =
280         m_phrase_length_indexes[first_key];
281
282     if ( length_array )
283         return length_array->remove_index(phrase_length, phrase, token);
284
285     return ERROR_REMOVE_ITEM_DONOT_EXISTS;
286 }
287
288 int PhraseLengthIndexLevel2::add_index(int phrase_length,
289                                        /* in */ ucs4_t phrase[],
290                                        /* in */ phrase_token_t token) {
291     if (phrase_length >= MAX_PHRASE_LENGTH)
292         return ERROR_PHRASE_TOO_LONG;
293
294     if (m_phrase_array_indexes->len < phrase_length)
295         g_array_set_size(m_phrase_array_indexes, phrase_length);
296
297 #define CASE(len) case len:                                             \
298     {                                                                   \
299         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
300             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
301         if ( !array )                                                   \
302             array = new PhraseArrayIndexLevel2<len>;                    \
303         return array->add_index(phrase, token);                         \
304     }
305
306     switch(phrase_length){
307         CASE(1);
308         CASE(2);
309         CASE(3);
310         CASE(4);
311         CASE(5);
312         CASE(6);
313         CASE(7);
314         CASE(8);
315         CASE(9);
316         CASE(10);
317         CASE(11);
318         CASE(12);
319         CASE(13);
320         CASE(14);
321         CASE(15);
322         CASE(16);
323     default:
324         assert(false);
325     }
326
327 #undef CASE
328 }
329
330 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
331                                           /* in */ ucs4_t phrase[],
332                                           /* in */ phrase_token_t token) {
333     if (phrase_length >= MAX_PHRASE_LENGTH)
334         return ERROR_PHRASE_TOO_LONG;
335
336     if (m_phrase_array_indexes->len < phrase_length)
337         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
338
339 #define CASE(len) case len:                                             \
340     {                                                                   \
341         PhraseArrayIndexLevel2<len> * & array =  g_array_index          \
342             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
343         if ( !array )                                                   \
344             return ERROR_REMOVE_ITEM_DONOT_EXISTS;                      \
345         return array->remove_index(phrase, token);                      \
346     }
347
348     switch(phrase_length){
349         CASE(1);
350         CASE(2);
351         CASE(3);
352         CASE(4);
353         CASE(5);
354         CASE(6);
355         CASE(7);
356         CASE(8);
357         CASE(9);
358         CASE(10);
359         CASE(11);
360         CASE(12);
361         CASE(13);
362         CASE(14);
363         CASE(15);
364         CASE(16);
365     default:
366         assert(false);
367     }
368 #undef CASE
369 }