1 # -*- coding: utf-8 -*-
2 # vim:set et sts=4 sw=4:
4 # libpinyin - Library to deal with pinyin.
6 # Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
28 pinyin_list = sorted(pinyin.PINYIN_LIST)
29 shengmu_list = sorted(pinyin.SHENGMU_LIST)
30 yunmu_list = sorted(pinyin.YUNMU_LIST)
35 def load_phrase(filename):
36 phrasefile = open(filename, "r")
37 for line in phrasefile.readlines():
38 line = line.rstrip(os.linesep)
39 (pinyin_str, freq) = line.split(None, 1)
47 (first_key, second_key) = pinyin_str.split("'")
48 phrase_dict[(first_key, second_key)] = freq
50 phrase_dict[pinyin_str] = freq
54 def gen_all_divided():
55 for pinyin_key in pinyin_list:
56 for first_key in pinyin_list:
57 if len(pinyin_key) <= len(first_key):
59 if not pinyin_key.startswith(first_key):
61 second_key = pinyin_key[len(first_key):]
62 if second_key in pinyin_list:
63 yield pinyin_key, first_key, second_key
67 for (pinyin_key, first_key, second_key) in gen_all_divided():
68 if not (first_key, second_key) in phrase_dict:
71 if pinyin_key in phrase_dict:
72 orig_freq = phrase_dict[pinyin_key]
73 new_freq = phrase_dict[(first_key, second_key)]
74 yield pinyin_key, orig_freq, first_key, second_key, new_freq
77 def gen_all_resplit():
78 for pinyin_key in pinyin_list:
79 if pinyin_key[-1] in ["n", "g", "r"]:
80 for yun in yunmu_list:
81 if yun not in pinyin_list:
83 #check first new pinyin key
84 if not pinyin_key[:-1] in pinyin_list:
86 #check second new pinyin key
87 new_pinyin_key = pinyin_key[-1] + yun
88 if new_pinyin_key in pinyin_list:
89 yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
91 elif pinyin_key[-1] in ["e"]:
92 #check first new pinyin key
93 if pinyin_key[:-1] in pinyin_list:
94 yield pinyin_key, "r", pinyin_key[:-1], "er"
99 for (orig_first_key, orig_second_key, new_first_key, new_second_key) \
100 in gen_all_resplit():
101 #do the reverse here, as libpinyin pinyin parser is different with
102 #ibus-pinyin's parser.
103 (orig_first_key, orig_second_key, new_first_key, new_second_key) = \
104 (new_first_key, new_second_key, orig_first_key, orig_second_key)
105 if (new_first_key, new_second_key) not in phrase_dict:
108 new_freq = phrase_dict[(new_first_key, new_second_key)]
109 if (orig_first_key, orig_second_key) in phrase_dict:
110 orig_freq = phrase_dict[(orig_first_key, orig_second_key)]
111 yield orig_first_key, orig_second_key, orig_freq, \
112 new_first_key, new_second_key, new_freq
116 load_phrase("pinyins.txt")
117 load_phrase("specials.txt")
119 if __name__ == "__main__":
120 for p in filter_divided():
122 for p in filter_resplit():