add const modifiers for chewing table
[platform/upstream/libpinyin.git] / scripts / pinyintable.py
1 # -*- coding: utf-8 -*-
2 # vim:set et sts=4 sw=4:
3 #
4 # libpinyin - Library to deal with pinyin.
5 #
6 # Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21
22 import pinyin
23 import bopomofo
24 import chewing
25 import itertools
26 from correct import *
27
28
29 pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
30 shengmu_list = sorted(pinyin.SHENGMU_LIST)
31
32
33 def check_pinyin_chewing_map():
34     for pinyin_key in pinyin.PINYIN_DICT.keys():
35         if pinyin_key in pinyin_list:
36             pass
37         else:
38             print("pinyin %s has no chewing mapping", pinyin_key)
39
40
41 def get_chewing(pinyin_key):
42     initial, middle, final = \
43         'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
44     assert pinyin_key != None
45     assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP
46
47     #handle 'w' and 'y'
48     if pinyin_key[0] == 'w':
49         initial = 'PINYIN_W'
50     if pinyin_key[0] == 'y':
51         initial = 'PINYIN_Y'
52
53     #get chewing string
54     bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
55
56     #handle bopomofo SPECIAL_INITIAL_SET
57     if pinyin_key in bopomofo.SPECIAL_INITIAL_SET:
58         middle = "CHEWING_I"
59     #normal process
60     for char in bopomofo_str:
61         if char in chewing.CHEWING_ASCII_INITIAL_MAP:
62             initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
63         if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
64             middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
65         if char in chewing.CHEWING_ASCII_FINAL_MAP:
66             final = chewing.CHEWING_ASCII_FINAL_MAP[char]
67         if char == "ㄜ":  # merge "ㄝ" and "ㄜ"
68             final = "CHEWING_E"
69
70     post_process_rules = {
71         #handle "ueng"/"ong"
72         ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
73         #handle "veng"/"iong"
74         ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
75         #handle "ien"/"in"
76         ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
77         #handle "ieng"/"ing"
78         ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
79         }
80
81     if (middle, final) in post_process_rules:
82         (middle, final) = post_process_rules[(middle, final)]
83
84     return initial, middle, final
85
86
87 def gen_pinyin_list():
88     for p in itertools.chain(gen_pinyins(),
89                              gen_shengmu(),
90                              gen_corrects(),
91                              gen_u_to_v(),
92                              ):
93         yield p
94
95
96 def gen_pinyins():
97     #generate all pinyins in bopomofo
98     for pinyin_key in pinyin_list:
99         flags = []
100         if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
101             flags.append("IS_CHEWING")
102         if pinyin_key in pinyin.PINYIN_LIST or \
103                 pinyin_key in pinyin.SHENGMU_LIST:
104             flags.append("IS_PINYIN")
105         if pinyin_key in shengmu_list:
106             flags.append("PINYIN_INCOMPLETE")
107         chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
108         if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
109                 pinyin_key not in bopomofo.SPECIAL_INITIAL_SET:
110             flags.append("CHEWING_INCOMPLETE")
111         yield pinyin_key, pinyin_key, chewing_key, \
112             flags, get_chewing(pinyin_key)
113
114
115 def get_shengmu_chewing(shengmu):
116     assert shengmu in shengmu_list, "Expected shengmu here."
117     chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
118     if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
119         initial = chewing_key
120     else:
121         initial = 'PINYIN_{0}'.format(shengmu.upper())
122     return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"
123
124 def gen_shengmu():
125     #generate all shengmu
126     for shengmu in shengmu_list:
127         if shengmu in pinyin_list:
128             continue
129         flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
130         chewing_key = get_shengmu_chewing(shengmu)
131         chewing_initial = chewing_key[0]
132         if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
133             chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
134         yield shengmu, shengmu, chewing_initial, \
135             flags, chewing_key
136
137
138 def gen_corrects():
139     #generate corrections
140     for correct, wrong in auto_correct:
141         flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
142                                                               correct.upper())]
143         for pinyin_key in pinyin_list:
144             #fixes partial pinyin instead of the whole pinyin
145             if pinyin_key.endswith(correct) and pinyin_key != correct:
146                 chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
147                 new_pinyin_key = pinyin_key.replace(correct, wrong)
148                 yield pinyin_key, new_pinyin_key, chewing_key,\
149                     flags, get_chewing(pinyin_key)
150
151
152 def gen_u_to_v():
153     #generate U to V
154     for correct, wrong, flags in auto_correct_ext:
155         #over-ride flags
156         flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
157         pinyin_key = correct
158         chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
159         yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)
160
161 ### main function ###
162 if __name__ == "__main__":
163     #pre-check here
164     check_pinyin_chewing_map()
165
166     #dump
167     for p in gen_pinyin_list():
168         print (p)