3 * Library to deal with pinyin.
5 * Copyright (C) 2006-2007 Peng Wu
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
32 * The system single gram contains the trained freqs.
33 * The user single gram contains the delta freqs.
34 * During the Viterbi beam search, use merge_single_gram to merge the system
35 * single gram and the user single gram.
42 * The single gram in the bi-gram.
47 friend bool merge_single_gram(SingleGram * merged,
48 const SingleGram * system,
49 const SingleGram * user);
53 SingleGram(void * buffer, size_t length);
56 * SingleGram::SingleGram:
58 * The constructor of the SingleGram.
63 * SingleGram::retrieve_all:
64 * @array: the GArray to store the retrieved bi-gram phrase item.
65 * @returns: whether the retrieve operation is successful.
67 * Retrieve all bi-gram phrase items in this single gram.
70 bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
74 * @range: the token range.
75 * @array: the GArray to store the matched bi-gram phrase item.
76 * @returns: whether the search operation is successful.
78 * Search the bi-gram phrase items according to the token range.
80 * Note: the array result may contain many items.
83 bool search(/* in */ PhraseIndexRange * range,
84 /* out */ BigramPhraseArray array) const;
87 * SingleGram::insert_freq:
88 * @token: the phrase token.
89 * @freq: the freq of this token.
90 * @returns: whether the insert operation is successful.
92 * Insert the token with the freq.
95 bool insert_freq(/* in */ phrase_token_t token,
96 /* in */ guint32 freq);
99 * SingleGram::remove_freq:
100 * @token: the phrase token.
101 * @freq: the freq of the removed token.
102 * @returns: whether the remove operation is successful.
107 bool remove_freq(/* in */ phrase_token_t token,
108 /* out */ guint32 & freq);
111 * SingleGram::get_freq:
112 * @token: the phrase token.
113 * @freq: the freq of the token.
114 * @returns: whether the get operation is successful.
116 * Get the freq of the token.
119 bool get_freq(/* in */ phrase_token_t token,
120 /* out */ guint32 & freq) const;
123 * SingleGram::set_freq:
124 * @token: the phrase token.
125 * @freq: the freq of the token.
126 * @returns: whether the set operation is successful.
128 * Set the freq of the token.
131 bool set_freq(/* in */ phrase_token_t token,
132 /* in */ guint32 freq);
135 * SingleGram::get_total_freq:
136 * @total: the total freq of this single gram.
137 * @returns: whether the get operation is successful.
139 * Get the total freq of this single gram.
142 bool get_total_freq(guint32 & total) const;
145 * SingleGram::set_total_freq:
146 * @total: the total freq of this single gram.
147 * @returns: whether the set operation is successful.
149 * Set the total freq of this single gram.
152 bool set_total_freq(guint32 total);
156 * @returns: whether the prune operation is successful.
158 * Obsoleted by Katz k mixture model pruning.
178 m_db->close(m_db, 0);
187 * The constructor of the Bigram.
197 * The destructor of the Bigram.
206 * @dbfile: the Berkeley DB file name.
207 * @returns: whether the load operation is successful.
209 * Load the Berkeley DB into memory.
212 bool load_db(const char * dbfile);
216 * @dbfile: the Berkeley DB file name.
217 * @returns: whether the save operation is successful.
219 * Save the in-memory Berkeley DB into disk.
222 bool save_db(const char * dbfile);
226 * @dbfile: the Berkeley DB file name.
227 * @flags: the flags of enum ATTACH_FLAG.
228 * @returns: whether the attach operation is successful.
230 * Attach this Bigram with the Berkeley DB.
233 bool attach(const char * dbfile, guint32 flags);
237 * @index: the previous token in the bi-gram.
238 * @single_gram: the single gram of the previous token.
239 * @returns: whether the load operation is successful.
241 * Load the single gram of the previous token into the SingleGram class.
244 bool load(/* in */ phrase_token_t index,
245 /* out */ SingleGram * & single_gram);
249 * @index: the previous token in the bi-gram.
250 * @single_gram: the single gram of the previous token.
251 * @returns: whether the store operation is successful.
253 * Store the single gram of the previous token from the SingleGram class.
256 bool store(/* in */ phrase_token_t index,
257 /* in */ SingleGram * single_gram);
260 * Bigram::get_all_items:
261 * @items: the GArray to store all previous tokens.
262 * @returns: whether the get operation is successful.
264 * Get the array of all previous tokens for parameter estimation.
267 bool get_all_items(/* out */ GArray * items);
272 * @merged: the merged single gram of system and user single gram.
273 * @system: the system single gram to be merged.
274 * @user: the user single gram to be merged.
275 * @returns: whether the merge operation is successful.
277 * Merge the system and user single gram into one merged single gram.
279 * Note: Please keep system and user single gram
280 * when using merged single gram.
283 bool merge_single_gram(SingleGram * merged, const SingleGram * system,
284 const SingleGram * user);