3 * Library to deal with pinyin.
5 * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 #include "phrase_large_table2.h"
27 /* class definition */
31 class PhraseLengthIndexLevel2{
33 GArray * m_phrase_array_indexes;
35 PhraseLengthIndexLevel2();
36 ~PhraseLengthIndexLevel2();
38 /* load/store method */
39 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
40 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
43 int search(int phrase_length, /* in */ ucs4_t phrase[],
44 /* out */ PhraseTokens tokens) const;
46 /* add_index/remove_index method */
47 int add_index(int phrase_length, /* in */ ucs4_t phrase[],
48 /* in */ phrase_token_t token);
49 int remove_index(int phrase_length, /* in */ ucs4_t phrase[],
50 /* in */ phrase_token_t token);
54 template<size_t phrase_length>
55 struct PhraseIndexItem2{
56 phrase_token_t m_token;
57 ucs4_t m_phrase[phrase_length];
59 PhraseIndexItem2<phrase_length>(ucs4_t phrase[], phrase_token_t token){
60 memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
66 template<size_t phrase_length>
67 class PhraseArrayIndexLevel2{
69 typedef PhraseIndexItem2<phrase_length> IndexItem;
74 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
75 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
78 int search(/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
80 /* add_index/remove_index method */
81 int add_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
82 int remove_index(/* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
87 using namespace pinyin;
89 /* class implementation */
91 template<size_t phrase_length>
92 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
93 const PhraseIndexItem2<phrase_length> &rhs){
94 ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
95 ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
97 return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
100 template<size_t phrase_length>
101 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
102 const PhraseIndexItem2<phrase_length> & rhs){
103 return 0 > phrase_compare2(lhs, rhs);
106 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
107 memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
110 void PhraseBitmapIndexLevel2::reset(){
111 for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
112 PhraseLengthIndexLevel2 * length_array =
113 m_phrase_length_indexes[i];
119 int PhraseBitmapIndexLevel2::search(int phrase_length,
120 /* in */ ucs4_t phrase[],
121 /* out */ PhraseTokens tokens) const {
122 assert(phrase_length > 0);
124 int result = SEARCH_NONE;
125 /* use the first 8-bit of the lower 16-bit for bitmap index,
126 * as most the higher 16-bit are zero.
128 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
130 PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
132 return phrase_array->search(phrase_length, phrase, tokens);
136 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
137 m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
140 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
141 #define CASE(len) case len: \
143 PhraseArrayIndexLevel2<len> * & array = g_array_index \
144 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
152 for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
174 g_array_free(m_phrase_array_indexes, TRUE);
178 int PhraseLengthIndexLevel2::search(int phrase_length,
179 /* in */ ucs4_t phrase[],
180 /* out */ PhraseTokens tokens) const {
181 int result = SEARCH_NONE;
182 if(m_phrase_array_indexes->len < phrase_length)
184 if (m_phrase_array_indexes->len > phrase_length)
185 result |= SEARCH_CONTINUED;
187 #define CASE(len) case len: \
189 PhraseArrayIndexLevel2<len> * array = g_array_index \
190 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
193 result |= array->search(phrase, tokens); \
197 switch ( phrase_length ){
220 template<size_t phrase_length>
221 int PhraseArrayIndexLevel2<phrase_length>::search
222 (/* in */ ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
223 int result = SEARCH_NONE;
225 IndexItem * chunk_begin = NULL, * chunk_end = NULL;
226 chunk_begin = (IndexItem *) m_chunk.begin();
227 chunk_end = (IndexItem *) m_chunk.end();
230 IndexItem item(phrase, -1);
231 std_lite::pair<IndexItem *, IndexItem *> range;
232 range = std_lite::equal_range
233 (chunk_begin, chunk_end, item,
234 phrase_less_than2<phrase_length>);
236 const IndexItem * const begin = range.first;
237 const IndexItem * const end = range.second;
241 const IndexItem * iter = NULL;
242 GArray * array = NULL;
244 for (iter = begin; iter != end; ++iter) {
245 phrase_token_t token = iter->m_token;
247 /* filter out disabled sub phrase indices. */
248 array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
254 g_array_append_val(array, token);
260 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
261 /* in */ ucs4_t phrase[],
262 /* in */ phrase_token_t token){
263 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
265 PhraseLengthIndexLevel2 * & length_array =
266 m_phrase_length_indexes[first_key];
268 if ( !length_array ){
269 length_array = new PhraseLengthIndexLevel2();
271 return length_array->add_index(phrase_length, phrase, token);
274 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
275 /* in */ ucs4_t phrase[],
276 /* in */ phrase_token_t token){
277 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
279 PhraseLengthIndexLevel2 * & length_array =
280 m_phrase_length_indexes[first_key];
283 return length_array->remove_index(phrase_length, phrase, token);
285 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
288 int PhraseLengthIndexLevel2::add_index(int phrase_length,
289 /* in */ ucs4_t phrase[],
290 /* in */ phrase_token_t token) {
291 if (phrase_length >= MAX_PHRASE_LENGTH)
292 return ERROR_PHRASE_TOO_LONG;
294 if (m_phrase_array_indexes->len < phrase_length)
295 g_array_set_size(m_phrase_array_indexes, phrase_length);
297 #define CASE(len) case len: \
299 PhraseArrayIndexLevel2<len> * & array = g_array_index \
300 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
302 array = new PhraseArrayIndexLevel2<len>; \
303 return array->add_index(phrase, token); \
306 switch(phrase_length){
330 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
331 /* in */ ucs4_t phrase[],
332 /* in */ phrase_token_t token) {
333 if (phrase_length >= MAX_PHRASE_LENGTH)
334 return ERROR_PHRASE_TOO_LONG;
336 if (m_phrase_array_indexes->len < phrase_length)
337 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
339 #define CASE(len) case len: \
341 PhraseArrayIndexLevel2<len> * & array = g_array_index \
342 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
344 return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
345 return array->remove_index(phrase, token); \
348 switch(phrase_length){