3 * Library to deal with pinyin.
5 * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #ifndef PHRASE_LARGE_TABLE2_H
23 #define PHRASE_LARGE_TABLE2_H
26 #include "novel_types.h"
27 #include "memory_chunk.h"
31 const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
33 class PhraseLengthIndexLevel2;
35 class PhraseBitmapIndexLevel2{
37 PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
38 /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
41 PhraseBitmapIndexLevel2();
42 ~PhraseBitmapIndexLevel2(){
46 /* load/store method */
47 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
48 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
51 int search(int phrase_length, /* in */ ucs4_t phrase[],
52 /* out */ PhraseTokens tokens) const;
54 /* add_index/remove_index method */
55 int add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
57 int remove_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
61 class PhraseLargeTable2{
63 PhraseBitmapIndexLevel2 m_bitmap_table;
64 MemoryChunk * m_chunk;
81 /* load/store method */
82 bool load(MemoryChunk * chunk){
85 return m_bitmap_table.load(chunk, 0, chunk->size());
88 bool store(MemoryChunk * new_chunk){
90 return m_bitmap_table.store(new_chunk, 0, end);
93 bool load_text(FILE * file);
96 int search(int phrase_length, /* in */ ucs4_t phrase[],
97 /* out */ PhraseTokens tokens) const {
98 return m_bitmap_table.search(phrase_length, phrase, tokens);
101 /* add_index/remove_index method */
102 int add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token) {
103 return m_bitmap_table.add_index(phrase_length, phrase, token);
106 int remove_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token) {
107 return m_bitmap_table.remove_index(phrase_length, phrase, token);
112 static inline int reduce_tokens(PhraseTokens tokens,
113 GArray * tokenarray) {
115 g_array_set_size(tokenarray, 0);
117 for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
118 GArray * array = tokens[i];
124 g_array_append_vals(tokenarray, array->data, array->len);
127 /* the following line will be removed in future after code are verified. */
128 assert(0 == num || 1 == num);
133 /* for compatibility. */
134 static inline int get_first_token(PhraseTokens tokens,
135 /* out */ phrase_token_t & token){
136 int num = 0; token = null_token;
138 for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
139 GArray * array = tokens[i];
140 if (NULL == array || 0 == array->len)
145 if (null_token == token) {
146 token = g_array_index(array, phrase_token_t, 0);
150 /* the following line will be removed in future after code are verified. */
151 assert(0 == num || 1 == num);