3 * Library to deal with pinyin.
5 * Copyright (C) 2006-2007 Peng Wu
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
32 * The system single gram contains the trained freqs.
33 * The user single gram contains the delta freqs.
34 * During the Viterbi beam search, use merge_single_gram to merge the system
35 * single gram and the user single gram.
42 * The single gram in the bi-gram.
47 friend bool merge_single_gram(SingleGram * merged,
48 const SingleGram * system,
49 const SingleGram * user);
53 SingleGram(void * buffer, size_t length);
56 * SingleGram::SingleGram:
58 * The constructor of the SingleGram.
63 * SingleGram::retrieve_all:
64 * @array: the GArray to store the retrieved bi-gram phrase item.
65 * @returns: whether the retrieve operation is successful.
67 * Retrieve all bi-gram phrase items in this single gram.
70 bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
74 * @range: the token range.
75 * @array: the GArray to store the matched bi-gram phrase item.
76 * @returns: whether the search operation is successful.
78 * Search the bi-gram phrase items according to the token range.
80 * Note: the array result may contain many items.
83 bool search(/* in */ PhraseIndexRange * range,
84 /* out */ BigramPhraseArray array) const;
87 * SingleGram::insert_freq:
88 * @token: the phrase token.
89 * @freq: the freq of this token.
90 * @returns: whether the insert operation is successful.
92 * Insert the token with the freq.
95 bool insert_freq(/* in */ phrase_token_t token,
96 /* in */ guint32 freq);
99 * SingleGram::remove_freq:
100 * @token: the phrase token.
101 * @freq: the freq of the removed token.
102 * @returns: whether the remove operation is successful.
107 bool remove_freq(/* in */ phrase_token_t token,
108 /* out */ guint32 & freq);
111 * SingleGram::get_freq:
112 * @token: the phrase token.
113 * @freq: the freq of the token.
114 * @returns: whether the get operation is successful.
116 * Get the freq of the token.
119 bool get_freq(/* in */ phrase_token_t token,
120 /* out */ guint32 & freq) const;
123 * SingleGram::set_freq:
124 * @token: the phrase token.
125 * @freq: the freq of the token.
126 * @returns: whether the set operation is successful.
128 * Set the freq of the token.
131 bool set_freq(/* in */ phrase_token_t token,
132 /* in */ guint32 freq);
135 * SingleGram::get_total_freq:
136 * @total: the total freq of this single gram.
137 * @returns: whether the get operation is successful.
139 * Get the total freq of this single gram.
142 bool get_total_freq(guint32 & total) const;
145 * SingleGram::set_total_freq:
146 * @total: the total freq of this single gram.
147 * @returns: whether the set operation is successful.
149 * Set the total freq of this single gram.
152 bool set_total_freq(guint32 total);
155 * SingleGram::get_length:
156 * @returns: the number of items in this single gram.
158 * Get the number of items in this single gram.
161 guint32 get_length();
164 * SingleGram::mask_out:
167 * @returns: the number of removed items.
169 * Mask out the matched items in this single gram.
172 guint32 mask_out(phrase_token_t mask, phrase_token_t value);
176 * @returns: whether the prune operation is successful.
178 * Obsoleted by Katz k mixture model pruning.
198 m_db->close(m_db, 0);
207 * The constructor of the Bigram.
217 * The destructor of the Bigram.
226 * @dbfile: the Berkeley DB file name.
227 * @returns: whether the load operation is successful.
229 * Load the Berkeley DB into memory.
232 bool load_db(const char * dbfile);
236 * @dbfile: the Berkeley DB file name.
237 * @returns: whether the save operation is successful.
239 * Save the in-memory Berkeley DB into disk.
242 bool save_db(const char * dbfile);
246 * @dbfile: the Berkeley DB file name.
247 * @flags: the flags of enum ATTACH_FLAG.
248 * @returns: whether the attach operation is successful.
250 * Attach this Bigram with the Berkeley DB.
253 bool attach(const char * dbfile, guint32 flags);
257 * @index: the previous token in the bi-gram.
258 * @single_gram: the single gram of the previous token.
259 * @returns: whether the load operation is successful.
261 * Load the single gram of the previous token.
264 bool load(/* in */ phrase_token_t index,
265 /* out */ SingleGram * & single_gram);
269 * @index: the previous token in the bi-gram.
270 * @single_gram: the single gram of the previous token.
271 * @returns: whether the store operation is successful.
273 * Store the single gram of the previous token.
276 bool store(/* in */ phrase_token_t index,
277 /* in */ SingleGram * single_gram);
281 * @index: the previous token in the bi-gram.
282 * @returns: whether the remove operation is successful.
284 * Remove the single gram of the previous token.
287 bool remove(/* in */ phrase_token_t index);
290 * Bigram::get_all_items:
291 * @items: the GArray to store all previous tokens.
292 * @returns: whether the get operation is successful.
294 * Get the array of all previous tokens for parameter estimation.
297 bool get_all_items(/* out */ GArray * items);
303 * @returns: whether the mask out operation is successful.
305 * Mask out the matched items.
308 bool mask_out(phrase_token_t mask, phrase_token_t value);
313 * @merged: the merged single gram of system and user single gram.
314 * @system: the system single gram to be merged.
315 * @user: the user single gram to be merged.
316 * @returns: whether the merge operation is successful.
318 * Merge the system and user single gram into one merged single gram.
320 * Note: Please keep system and user single gram
321 * when using merged single gram.
324 bool merge_single_gram(SingleGram * merged, const SingleGram * system,
325 const SingleGram * user);