From 9f34b7fd069299791a3dff1a205014a4b42759e8 Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Wed, 28 Apr 2010 18:48:11 +0800 Subject: [PATCH] Refine python script --- src/update-simptrad-table.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/update-simptrad-table.py b/src/update-simptrad-table.py index 92721c1..a403ec8 100755 --- a/src/update-simptrad-table.py +++ b/src/update-simptrad-table.py @@ -23,33 +23,33 @@ def filter_more(records, n): hanm = filter(lambda (k, v): convert(k, hand, n) != v, records) return hanm + han -def eq_filter(k, v): - for i in range(0, len(k)): - if k[i] == v[i]: - continue - if k[i] not in S_2_T: - return False - if v[i] not in S_2_T[k[i]]: - return False +def filter_func(args): + k, v = args + # length is not equal or length > 6 + if len(k) != len(v) or len(k) > 6: + return False + # k includes invalid hanzi + if not all(c in valid_hanzi for c in k): + return False + # v includes invalid hanzi + if not all(c in valid_hanzi for c in v): + return False + + # # check chars in k and v + # for c1, c2 in zip(k, v): + # if c1 == c2: + # continue + # if c2 not in S_2_T.get(c1, []): + # return False return True def get_records(): records = zh2Hant.items() - # remove invalid hanzi - records = filter(lambda (k, v): all([c in valid_hanzi for c in k]) and all([c in valid_hanzi for c in v]), records) - - # remove if length is not equal - records = filter(lambda (k, v): len(k) == len(v), records) - - # remove if length > 4 - records = filter(lambda (k, v): len(k) <= 6, records) - - # remove - # records = filter(lambda (k, v):eq_filter(k, v), records) + records = filter(filter_func, records) maxlen = max(map(lambda (k,v): len(k), records)) - for i in range(1, maxlen - 1, 1): + for i in range(1, maxlen - 1): records = filter_more(records, i) records.sort() -- 2.7.4