begin to write re-split post processing
authorPeng Wu <alexepico@gmail.com>
Wed, 16 Nov 2011 09:43:32 +0000 (17:43 +0800)
committerPeng Wu <alexepico@gmail.com>
Wed, 16 Nov 2011 09:43:32 +0000 (17:43 +0800)
src/storage/pinyin_parser2.cpp

index 9fd85cb..e1fb1b0 100644 (file)
@@ -268,9 +268,63 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
 
     /* final step for back tracing. */
     gint16 parsed_len = final_step(step_len, keys, key_rests);
+    assert(keys->len == key_rests->len);
+    gint16 num_keys = keys->len;
 
     /* post processing for re-split table. */
     if (options & USE_RESPLIT_TABLE) {
+
+        ChewingKey * cur_key = NULL, * next_key = NULL;
+        ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
+        guint16 cur_tone = CHEWING_ZERO_TONE, next_tone = CHEWING_ZERO_TONE;
+
+        for (i = 0; i < num_keys - 1; ++i) {
+            cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
+            next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
+
+            /* some "'" here */
+            if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+                continue;
+
+            cur_key = &g_array_index(keys, ChewingKey, i);
+            next_key = &g_array_index(keys, ChewingKey, i + 1);
+
+            if (options & USE_TONE) {
+                cur_tone = cur_key->m_tone;
+                next_tone = next_key->m_tone;
+                cur_key->m_tone = next_key->m_tone = CHEWING_ZERO_TONE;
+            }
+
+            /* lookup re-split table */
+            size_t k;
+            resplit_table_item_t * item = NULL;
+            for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+                item = resplit_table + k;
+                /* no ops */
+                if (item->m_orig_freq >= item->m_new_freq)
+                    continue;
+                /* TODO: refine code style here. */
+                if (item->m_orig_first_key == *cur_key &&
+                    item->m_orig_second_key == *next_key)
+                    break;
+                /* TODO: should use pinyin_exact_compare2 here. */
+                assert(FALSE);
+            }
+            if (k < G_N_ELEMENTS(resplit_table)) {
+                /* do re-split */
+                item = resplit_table + k;
+                *cur_key = item->m_new_first_key;
+                *next_key = item->m_new_second_key;
+                /* assumes only moved one char in gen_all_resplit script. */
+                cur_rest->m_raw_end --;
+                next_rest->m_raw_begin --;
+                /* save back tones */
+                if (options & USE_TONE) {
+                    cur_key->m_tone = cur_tone;
+                    next_key->m_tone = next_tone;
+                }
+            }
+        }
         
     }
 
@@ -285,7 +339,7 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
     parse_value_t * curstep = NULL;
 
     /* find longest match, which starts from the beginning of input. */
-    for ( i = step_len - 1; i >= 0; --i) {
+    for (i = step_len - 1; i >= 0; --i) {
         curstep = &g_array_index(m_parse_steps, parse_value_t, i);
         if (i == curstep->m_parsed_len)
             break;