From: Peng Wu Date: Wed, 16 Nov 2011 09:43:32 +0000 (+0800) Subject: begin to write re-split post processing X-Git-Tag: 0.4.91~167 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7c2c087c4ab0cd31c12bf686fe754c0c1064847a;p=platform%2Fupstream%2Flibpinyin.git begin to write re-split post processing --- diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 9fd85cb..e1fb1b0 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -268,9 +268,63 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys, /* final step for back tracing. */ gint16 parsed_len = final_step(step_len, keys, key_rests); + assert(keys->len == key_rests->len); + gint16 num_keys = keys->len; /* post processing for re-split table. */ if (options & USE_RESPLIT_TABLE) { + + ChewingKey * cur_key = NULL, * next_key = NULL; + ChewingKeyRest * cur_rest = NULL, * next_rest = NULL; + guint16 cur_tone = CHEWING_ZERO_TONE, next_tone = CHEWING_ZERO_TONE; + + for (i = 0; i < num_keys - 1; ++i) { + cur_rest = &g_array_index(key_rests, ChewingKeyRest, i); + next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1); + + /* some "'" here */ + if (cur_rest->m_raw_end != next_rest->m_raw_begin) + continue; + + cur_key = &g_array_index(keys, ChewingKey, i); + next_key = &g_array_index(keys, ChewingKey, i + 1); + + if (options & USE_TONE) { + cur_tone = cur_key->m_tone; + next_tone = next_key->m_tone; + cur_key->m_tone = next_key->m_tone = CHEWING_ZERO_TONE; + } + + /* lookup re-split table */ + size_t k; + resplit_table_item_t * item = NULL; + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + /* no ops */ + if (item->m_orig_freq >= item->m_new_freq) + continue; + /* TODO: refine code style here. */ + if (item->m_orig_first_key == *cur_key && + item->m_orig_second_key == *next_key) + break; + /* TODO: should use pinyin_exact_compare2 here. */ + assert(FALSE); + } + if (k < G_N_ELEMENTS(resplit_table)) { + /* do re-split */ + item = resplit_table + k; + *cur_key = item->m_new_first_key; + *next_key = item->m_new_second_key; + /* assumes only moved one char in gen_all_resplit script. */ + cur_rest->m_raw_end --; + next_rest->m_raw_begin --; + /* save back tones */ + if (options & USE_TONE) { + cur_key->m_tone = cur_tone; + next_key->m_tone = next_tone; + } + } + } } @@ -285,7 +339,7 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys, parse_value_t * curstep = NULL; /* find longest match, which starts from the beginning of input. */ - for ( i = step_len - 1; i >= 0; --i) { + for (i = step_len - 1; i >= 0; --i) { curstep = &g_array_index(m_parse_steps, parse_value_t, i); if (i == curstep->m_parsed_len) break;