update pinyin parser
authorPeng Wu <alexepico@gmail.com>
Thu, 12 Jan 2012 04:30:19 +0000 (12:30 +0800)
committerPeng Wu <alexepico@gmail.com>
Thu, 12 Jan 2012 04:30:19 +0000 (12:30 +0800)
scripts/specialtable.py
src/storage/pinyin_parser2.cpp

index 41f9a26..89fa097 100644 (file)
@@ -107,7 +107,11 @@ def gen_all_resplit():
 def filter_resplit():
     for (orig_first_key, orig_second_key, new_first_key, new_second_key) \
     in gen_all_resplit():
-        if not (new_first_key, new_second_key) in phrase_dict:
+        #do the reverse here, as libpinyin pinyin parser is different with
+        #ibus-pinyin's parser.
+        (orig_first_key, orig_second_key, new_first_key, new_second_key) = \
+            (new_first_key, new_second_key, orig_first_key, orig_second_key)
+        if (new_first_key, new_second_key) not in phrase_dict:
             continue
         orig_freq = 0
         new_freq = phrase_dict[(new_first_key, new_second_key)]
index ee43eaf..ceea641 100644 (file)
@@ -299,10 +299,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
             next_sep = k;
         }
 
-        pinyin_option_t heuristic_options = options & ~PINYIN_CORRECT_ALL;
-
+#if 0
         /* Heuristic Method:
-         *   do maximum forward match first, and without auto corrections. */
+         *   do maximum forward match first. */
         for (size_t pos = i; pos < next_sep; ++pos) {
             curstep = &g_array_index(m_parse_steps, parse_value_t, pos);
             size_t try_len = std_lite::min
@@ -317,7 +316,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
 
                 ChewingKey key; ChewingKeyRest rest;
                 bool parsed = parse_one_key
-                    (heuristic_options, key, onepinyin, onepinyinlen);
+                    (options, key, onepinyin, onepinyinlen);
                 rest.m_raw_begin = pos; rest.m_raw_end = n;
 
                 if (!parsed)
@@ -343,6 +342,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
                 break;
             }
         }
+#endif
 
         /* dynamic programming here. */
         for (size_t m = i; m < next_sep; ++m) {
@@ -379,6 +379,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
                 if (value.m_parsed_len == nextstep->m_parsed_len &&
                     value.m_num_keys < nextstep->m_num_keys)
                     *nextstep = value;
+                if (nextstep->m_key.m_initial == CHEWING_ZERO_INITIAL &&
+                    value.m_key.m_initial != CHEWING_ZERO_INITIAL)
+                    *nextstep = value;
             }
         }
     }
@@ -489,8 +492,8 @@ bool FullPinyinParser2::post_process(pinyin_option_t options,
             *cur_key = item->m_new_keys[0];
             *next_key = item->m_new_keys[1];
             /* assumes only moved one char in gen_all_resplit script. */
-            cur_rest->m_raw_end --;
-            next_rest->m_raw_begin --;
+            cur_rest->m_raw_end ++;
+            next_rest->m_raw_begin ++;
         }
 
         /* save back tones */