3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
5 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
7 # The contents of this file are subject to the terms of either the GNU Lesser
8 # General Public License Version 2.1 only ("LGPL") or the Common Development and
9 # Distribution License ("CDDL")(collectively, the "License"). You may not use this
10 # file except in compliance with the License. You can obtain a copy of the CDDL at
11 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
12 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the
13 # specific language governing permissions and limitations under the License. When
14 # distributing the software, include this License Header Notice in each file and
15 # include the full text of the License in the License file as well as the
18 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
20 # For Covered Software in this distribution, this License shall be governed by the
21 # laws of the State of California (excluding conflict-of-law provisions).
22 # Any litigation relating to this License shall be subject to the jurisdiction of
23 # the Federal Courts of the Northern District of California and the state courts
24 # of the State of California, with venue lying in Santa Clara County, California.
28 # If you wish your version of this file to be governed by only the CDDL or only
29 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
30 # include this software in this distribution under the [CDDL or LGPL Version 2.1]
31 # license." If you don't indicate a single choice of license, a recipient has the
32 # option to distribute your version of this file under either the CDDL or the LGPL
33 # Version 2.1, or to extend the choice of license to its licensees as provided
34 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
35 # Version 2 license, then the option applies only if the new code is made subject
36 # to such option by the copyright holder.
38 initials = ["", "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w", ]
40 finals = ["", "a", "o", "e", "ai", "ei", "ao", "ou", "an", "en", "ang", "eng", "er", "i", "ia", "ie", "iao", "iu", "ian", "in", "iang", "ing", "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ong", "v", "ue", "iong", ]
42 inner_fuzzy_finals = ['ia', 'iao', 'ian', 'iang', 'ie', 'ua', 'uai', 'uan', 'uang', 'ue']
61 auto_correction_pairs = {
508 def decode_syllable (s):
509 return initials[(s>>12)], finals[(s&0x00ff0)>>4]
511 def init_fuzzy_map (fuzzy_pairs):
513 for i, j in fuzzy_pairs:
514 fuzzy_map.setdefault (i, []).append (j)
515 fuzzy_map.setdefault (j, []).append (i)
519 fuzzy_map = init_fuzzy_map (fuzzy_pairs)
521 def get_fuzzy_syllables (syllable):
522 i, f = decode_syllable (syllable)
523 iset = fuzzy_map.setdefault(i, []) + [i]
524 fset = fuzzy_map.setdefault(f, []) + [f]
525 sset = [valid_syllables[i+f] for i in iset for f in fset if i+f in valid_syllables]
526 sset.remove (syllable)
529 def gen_suffix_trie (fname):
530 from trie import Trie, DATrie
535 for s in valid_syllables:
536 trie.add (s[::-1], valid_syllables[s])
538 pytrie.construct_from_trie (trie)
539 pytrie.output_static_c_arrays (fname)
541 def gen_fuzzy_syllable_pairs_tables ():
542 fuzzy_pro_syllables = [s for s in valid_syllables if s[1:] in valid_syllables and s[0] in initials and s not in initials]
543 fuzzy_pre_syllables = [s for s in valid_syllables if s[:-1] in valid_syllables and s[-1] in initials and s not in initials]
545 initial_sets = set([s[0] for s in fuzzy_pro_syllables]) & set([s[-1] for s in fuzzy_pre_syllables])
547 fuzzy_pro_syllables = [s for s in fuzzy_pro_syllables if s[0] in initial_sets]
548 fuzzy_pre_syllables = [s for s in fuzzy_pre_syllables if s[-1] in initial_sets]
550 print "static const unsigned fuzzy_pre_syllables [] = {"
551 for s in fuzzy_pre_syllables:
552 print " %-12s %-12s %-12s /* %s */" % ("0x%05x," % valid_syllables[s[:-1]], "'%s'," % s[-1], "0x%05x," % valid_syllables[s], s)
556 print "static const unsigned fuzzy_pro_syllables [] = {"
557 for s in fuzzy_pro_syllables:
558 print " %-12s %-12s %-12s /* %s */" % ("0x%05x," % valid_syllables[s], "'%s'," % s[0], "0x%05x," % valid_syllables[s[1:]], s)
562 def gen_inner_fuzzy_syllable_tables ():
563 print "static const unsigned fuzzy_finals_map[] = {"
564 for s in inner_fuzzy_finals:
565 print " %-12s %-12s %-12s /* %-4s -> %-4s len %d */" % ("0x%02x," % finals.index(s), "0x%02x," % valid_syllables[s[1:]], "%d," % (len(s)-1,), s, s[1:], len(s)-1)
568 if __name__ == "__main__":
569 gen_suffix_trie ("../src/pinyin/quanpin_trie.h")
570 gen_inner_fuzzy_syllable_tables ()
571 gen_fuzzy_syllable_pairs_tables ()