3 * Library to deal with pinyin.
5 * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #ifndef PHRASE_LARGE_TABLE2_H
23 #define PHRASE_LARGE_TABLE2_H
26 #include "novel_types.h"
27 #include "memory_chunk.h"
31 const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
33 class PhraseLengthIndexLevel2;
35 class PhraseBitmapIndexLevel2{
37 PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
38 /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
41 PhraseBitmapIndexLevel2();
42 ~PhraseBitmapIndexLevel2(){
46 /* load/store method */
47 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
48 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
51 int search(int phrase_length, /* in */ ucs4_t phrase[],
52 /* out */ PhraseTokens tokens) const;
54 /* add_index/remove_index method */
55 int add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
57 int remove_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token);
60 bool mask_out(phrase_token_t mask, phrase_token_t value);
64 class PhraseLargeTable2{
66 PhraseBitmapIndexLevel2 m_bitmap_table;
67 MemoryChunk * m_chunk;
84 /* load/store method */
85 bool load(MemoryChunk * chunk){
88 return m_bitmap_table.load(chunk, 0, chunk->size());
91 bool store(MemoryChunk * new_chunk){
93 return m_bitmap_table.store(new_chunk, 0, end);
96 bool load_text(FILE * file);
99 int search(int phrase_length, /* in */ ucs4_t phrase[],
100 /* out */ PhraseTokens tokens) const {
101 return m_bitmap_table.search(phrase_length, phrase, tokens);
104 /* add_index/remove_index method */
105 int add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token) {
106 return m_bitmap_table.add_index(phrase_length, phrase, token);
109 int remove_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token) {
110 return m_bitmap_table.remove_index(phrase_length, phrase, token);
113 /* mask out method */
114 bool mask_out(phrase_token_t mask, phrase_token_t value) {
115 return m_bitmap_table.mask_out(mask, value);
120 static inline int reduce_tokens(const PhraseTokens tokens,
121 GArray * tokenarray) {
123 g_array_set_size(tokenarray, 0);
125 for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
126 GArray * array = tokens[i];
132 g_array_append_vals(tokenarray, array->data, array->len);
135 /* the following line will be removed in future after code are verified. */
136 assert(0 == num || 1 == num);
141 /* for compatibility. */
142 static inline int get_first_token(const PhraseTokens tokens,
143 /* out */ phrase_token_t & token){
146 GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
147 int num = reduce_tokens(tokens, tokenarray);
149 token = g_array_index(tokenarray, phrase_token_t, 0);
150 g_array_free(tokenarray, TRUE);