From 8ac95f0eefd222a27d28e8d92edcfa4b39464018 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 9 Dec 2011 16:31:59 +0800 Subject: [PATCH] compatible with maximum forward parser --- src/storage/pinyin_parser2.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 727dc28..2e2b649 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -283,6 +283,49 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, next_sep = k; } + /* Heuristic Method: + * do maximum forward match first. */ + for (size_t pos = i; pos < next_sep; ++pos) { + curstep = &g_array_index(m_parse_steps, parse_value_t, pos); + size_t try_len = std_lite::min + (pos + max_full_pinyin_length, next_sep); + for (size_t n = try_len; n > pos; --n) { + nextstep = &g_array_index(m_parse_steps, parse_value_t, n); + + /* gen next step */ + const char * onepinyin = input + pos; + gint16 onepinyinlen = n - pos; + value = parse_value_t(); + + ChewingKey key; ChewingKeyRest rest; + bool parsed = parse_one_key + (options, key, rest, onepinyin, onepinyinlen); + rest.m_raw_begin = pos; rest.m_raw_end = n; + + if (!parsed) + continue; + + //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); + value.m_key = key; value.m_key_rest = rest; + value.m_num_keys = curstep->m_num_keys + 1; + value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; + value.m_last_step = pos; + + /* save next step */ + if (-1 == nextstep->m_last_step) + *nextstep = value; + if (value.m_parsed_len > nextstep->m_parsed_len) + *nextstep = value; + if (value.m_parsed_len == nextstep->m_parsed_len && + value.m_num_keys < nextstep->m_num_keys) + *nextstep = value; + + /* maximum forward, set pos to n in next iteration. */ + pos = n - 1; + break; + } + } + /* dynamic programming here. */ for (size_t m = i; m < next_sep; ++m) { curstep = &g_array_index(m_parse_steps, parse_value_t, m); @@ -302,6 +345,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, rest.m_raw_begin = m; rest.m_raw_end = n; if (!parsed) continue; + + //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); + value.m_key = key; value.m_key_rest = rest; value.m_num_keys = curstep->m_num_keys + 1; value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; -- 2.7.4