4 import sqlite3 as sqlite
5 from pinyin_data import valid_syllables, decode_syllable, initials, finals
7 def get_userdict_path ():
8 homedir = os.environ.get("HOME")
10 if sys.platform == "darwin":
11 return homedir+"/Library/Application Support/SunPinyin/userdict"
13 # FIXME: not sure how to get the ibus version or wrapper type (xim or ibus)
14 if os.path.exists (homedir+"/.cache/ibus/sunpinyin"):
15 return homedir+"/.cache/ibus/sunpinyin/userdict"
17 if os.path.exists (homedir+"/.ibus/sunpinyin"):
18 return homedir+"/.ibus/sunpinyin/userdict"
20 if os.path.exists (homedir+"/.sunpinyin"):
21 return homedir+"/.sunpinyin/userdict"
23 raise "Can not detect sunpinyin's userdict!"
25 def get_sysdict_path ():
26 if sys.platform == "darwin":
27 homedir = os.environ.get("HOME")
28 sysdict_path = "/Library/Input Methods/SunPinyin.app/Contents/Resources/pydict_sc.bin"
29 if os.path.exists (homedir + sysdict_path):
30 return homedir + sysdict_path
34 return "/usr/lib/sunpinyin/data/pydict_sc.bin"
36 def load_system_dict ():
37 sysdict_path = get_sysdict_path ()
38 f = open (sysdict_path, "rb")
41 word_offset = struct.unpack ('I', f.read(4))[0]
47 for w in str.decode('UTF-32').split('\0'):
54 def import_to_sunpinyin_user_dict (records, userdict_path=''):
55 userdict_path = userdict_path if userdict_path else get_userdict_path()
56 db = sqlite.connect (userdict_path)
58 sysdict = load_system_dict()
61 CREATE TABLE IF NOT EXISTS dict(
62 id INTEGER PRIMARY KEY, len INTEGER,
63 i0 INTEGER, i1 INTEGER, i2 INTEGER, i3 INTEGER, i4 INTEGER, i5 INTEGER,
64 f0 INTEGER, f1 INTEGER, f2 INTEGER, f3 INTEGER, f4 INTEGER, f5 INTEGER,
65 utf8str TEXT, UNIQUE (utf8str));
67 db.executescript (sqlstring)
71 for (pystr, utf8str) in records:
73 syllables = [valid_syllables[s] for s in pystr.split("'")]
75 print "[%s] has un-recognized syllables, ignoring this record!" % pystr
78 if len (syllables) < 2 or len (syllables) > 6:
79 print "[%s] is too long or too short for sunpinyin userdict" % utf8str
82 if utf8str in sysdict:
83 #print "[%s] is already in sunpinyin's sysdict" % utf8str
87 record[0] = len (syllables)
92 i, f = s>>12, (s&0x00ff0)>>4
100 INSERT INTO dict (len, i0, f0, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, utf8str)
101 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
104 db.execute (sqlstring, record)
105 #print "[%s] is imported into sunpinyin's userdict" % utf8str
108 if batch_count == 100:
113 #print "[%s] is already in sunpinyin's userdict" % utf8str
119 def export_sunpinyin_user_dict (userdict_path=''):
120 userdict_path = userdict_path if userdict_path else get_userdict_path()
121 db = sqlite.connect (userdict_path)
123 sqlstring = "SELECT * FROM dict"
124 result = list (db.execute (sqlstring).fetchall ())
126 for record in result:
132 syls = [initials[i[x]] + finals[f[x]] for x in range(l)]
133 print str.encode ('UTF-8'), id, "'".join(syls)
135 if __name__ == "__main__":
136 export_sunpinyin_user_dict ()