3 * Library to deal with pinyin.
5 * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #ifndef PINYIN_PARSER2_H
23 #define PINYIN_PARSER2_H
26 #include "novel_types.h"
27 #include "chewing_key.h"
28 #include "pinyin_custom2.h"
33 const char * m_pinyin_str;
34 const char * m_shengmu_str;
35 const char * m_yunmu_str;
36 const char * m_chewing_str;
37 ChewingKey m_chewing_key;
38 } content_table_item_t;
41 const char * m_pinyin_input;
43 guint16 m_table_index;
44 } pinyin_index_item_t;
47 const char * m_chewing_input;
49 guint16 m_table_index;
50 } chewing_index_item_t;
53 const char * m_orig_key;
55 const char * m_new_keys[2];
57 } divided_table_item_t;
60 const char * m_orig_keys[2];
62 const char * m_new_keys[2];
64 } resplit_table_item_t;
67 const char * m_shengmu;
68 } double_pinyin_scheme_shengmu_item_t;
71 const char * m_yunmus[2];
72 } double_pinyin_scheme_yunmu_item_t;
76 const char * m_chewing;
77 } chewing_symbol_item_t;
82 } chewing_tone_item_t;
84 typedef GArray * ParseValueVector;
90 * Parse the ascii string into an array of the struct ChewingKeys.
97 * PhoneticParser2::~PhoneticParser2:
99 * The destructor of the PhoneticParser2.
102 virtual ~PhoneticParser2() {}
106 * PhoneticParser2::parse_one_key:
107 * @options: the pinyin options from pinyin_custom2.h.
108 * @key: the parsed result of struct ChewingKey.
109 * @str: the input of the ascii string.
110 * @len: the length of the str.
111 * @returns: whether the entire string is parsed as one key.
113 * Parse only one struct ChewingKey from a string.
116 virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0;
119 * PhoneticParser2::parse:
120 * @options: the pinyin options from pinyin_custom2.h.
121 * @keys: the parsed result of struct ChewingKeys.
122 * @str: the input of the ascii string.
123 * @len: the length of the str.
124 * @returns: the number of chars were actually used.
126 * Parse the ascii string into an array of struct ChewingKeys.
129 virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0;
137 * Parses the full pinyin string into an array of struct ChewingKeys.
140 class FullPinyinParser2 : public PhoneticParser2
142 /* Note: some internal pointers to full pinyin table. */
145 ParseValueVector m_parse_steps;
147 int final_step(size_t step_len, ChewingKeyVector & keys,
148 ChewingKeyRestVector & key_rests) const;
150 bool post_process2(pinyin_option_t options, ChewingKeyVector & keys,
151 ChewingKeyRestVector & key_rests,
152 const char * str, int len) const;
155 const divided_table_item_t * retrieve_divided_item
156 (pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
157 const char * str, int len) const;
159 const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins
160 (pinyin_option_t options,
161 ChewingKey * cur_key, ChewingKeyRest * cur_rest,
162 ChewingKey * next_key, ChewingKeyRest * next_rest,
163 const char * str, int len) const;
164 const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins
165 (pinyin_option_t options,
166 ChewingKey * cur_key, ChewingKeyRest * cur_rest,
167 ChewingKey * next_key, ChewingKeyRest * next_rest,
168 const char * str, int len) const;
172 virtual ~FullPinyinParser2() {
173 g_array_free(m_parse_steps, TRUE);
176 virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
179 * the parse method will use dynamic programming to drive parse_one_key.
181 virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
186 * DoublePinyinParser2:
188 * Parse the double pinyin string into an array of struct ChewingKeys.
191 /* The valid input chars of ShuangPin is a-z and ';'
193 class DoublePinyinParser2 : public PhoneticParser2
195 /* Note: two internal pointers to double pinyin scheme table. */
197 const double_pinyin_scheme_shengmu_item_t * m_shengmu_table;
198 const double_pinyin_scheme_yunmu_item_t * m_yunmu_table;
201 DoublePinyinParser2() {
202 m_shengmu_table = NULL; m_yunmu_table = NULL;
203 set_scheme(DOUBLE_PINYIN_DEFAULT);
206 virtual ~DoublePinyinParser2() {}
208 virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
210 virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
213 bool set_scheme(DoublePinyinScheme scheme);
220 * Parse the chewing string into an array of struct ChewingKeys.
222 * Several keyboard scheme are supported:
223 * * Chewing_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
224 * * Chewing_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
225 * * Chewing_GINYIEH Gin-Yieh ZhuYin keyboard.
226 * * Chewing_ETEN Eten (倚天) ZhuYin keyboard.
230 /* Note: maybe yunmus shuffle will be supported later.
231 * currently this feature is postponed.
233 class ChewingParser2 : public PhoneticParser2
235 /* Note: some internal pointers to chewing scheme table. */
237 const chewing_symbol_item_t * m_symbol_table;
238 const chewing_tone_item_t * m_tone_table;
242 m_symbol_table = NULL; m_tone_table = NULL;
243 set_scheme(CHEWING_DEFAULT);
246 virtual ~ChewingParser2() {}
248 virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
250 virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
253 bool set_scheme(ChewingScheme scheme);
254 bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const;
258 /* compare pinyins with chewing internal representations. */
259 inline int pinyin_compare_initial2(pinyin_option_t options,
261 ChewingInitial rhs) {
265 if ((options & PINYIN_AMB_C_CH) &&
266 ((lhs == CHEWING_C && rhs == CHEWING_CH) ||
267 (lhs == CHEWING_CH && rhs == CHEWING_C)))
270 if ((options & PINYIN_AMB_S_SH) &&
271 ((lhs == CHEWING_S && rhs == CHEWING_SH) ||
272 (lhs == CHEWING_SH && rhs == CHEWING_S)))
275 if ((options & PINYIN_AMB_Z_ZH) &&
276 ((lhs == CHEWING_Z && rhs == CHEWING_ZH) ||
277 (lhs == CHEWING_ZH && rhs == CHEWING_Z)))
280 if ((options & PINYIN_AMB_F_H) &&
281 ((lhs == CHEWING_F && rhs == CHEWING_H) ||
282 (lhs == CHEWING_H && rhs == CHEWING_F)))
285 if ((options & PINYIN_AMB_L_N) &&
286 ((lhs == CHEWING_L && rhs == CHEWING_N) ||
287 (lhs == CHEWING_N && rhs == CHEWING_L)))
290 if ((options & PINYIN_AMB_L_R) &&
291 ((lhs == CHEWING_L && rhs == CHEWING_R) ||
292 (lhs == CHEWING_R && rhs == CHEWING_L)))
295 if ((options & PINYIN_AMB_G_K) &&
296 ((lhs == CHEWING_G && rhs == CHEWING_K) ||
297 (lhs == CHEWING_K && rhs == CHEWING_G)))
304 inline int pinyin_compare_middle_and_final2(pinyin_option_t options,
305 ChewingMiddle middle_lhs,
306 ChewingMiddle middle_rhs,
307 ChewingFinal final_lhs,
308 ChewingFinal final_rhs) {
309 if (middle_lhs == middle_rhs && final_lhs == final_rhs)
312 /* both pinyin and chewing incomplete options will enable this. */
313 if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) {
314 if (middle_lhs == CHEWING_ZERO_MIDDLE &&
315 final_lhs == CHEWING_ZERO_FINAL)
317 if (middle_rhs == CHEWING_ZERO_MIDDLE &&
318 final_rhs == CHEWING_ZERO_FINAL)
322 /* compare chewing middle first. */
323 int middle_diff = middle_lhs - middle_rhs;
327 if ((options & PINYIN_AMB_AN_ANG) &&
328 ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) ||
329 (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN)))
332 if ((options & PINYIN_AMB_EN_ENG) &&
333 ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) ||
334 (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN)))
337 if ((options & PINYIN_AMB_IN_ING) &&
338 ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) ||
339 (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN)))
342 return (final_lhs - final_rhs);
346 inline int pinyin_compare_tone2(pinyin_option_t options,
351 if (lhs == CHEWING_ZERO_TONE)
353 if (rhs == CHEWING_ZERO_TONE)