* https://unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip
"""
-import os.path, sys, re
+import sys, re
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
logging.info('Preparing data tables...')
+
+# This is how the data is encoded:
+#
+# General_Category (gc), Canonical_Combining_Class (ccc),
+# and Script (sc) are encoded as integers.
+#
+# Mirroring character (bmg) is encoded as difference from
+# the original character.
+#
+# Composition & Decomposition (dm) are encoded elaborately,
+# as discussed below.
+
gc = [u['gc'] for u in ucd]
ccc = [int(u['ccc']) for u in ucd]
bmg = [int(v, 16) - int(u) if v else 0 for u,v in enumerate(u['bmg'] for u in ucd)]
-#gc_ccc_non0 = set((cat,klass) for cat,klass in zip(gc,ccc) if klass)
-#gc_bmg_non0 = set((cat,mirr) for cat,mirr in zip(gc, bmg) if mirr)
-
sc = [u['sc'] for u in ucd]
+
+# Prepare Compose / Decompose data
+#
+# This code is very dense. See hb_ucd_compose() / hb_ucd_decompose() for the logic.
+
dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd)
if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)}
ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
dm_order.update(dm1_order)
dm_order.update(dm2_order)
+
+# Prepare General_Category / Script mapping arrays
+
gc_order = dict()
for i,v in enumerate(('Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf',
sc_order[i] = tag
sc_array.append(name)
-DEFAULT = 1
-COMPACT = 3
-SLOPPY = 5
+# Write out main data
+
+DEFAULT = 'DEFAULT'
+COMPACT = 'COMPACT'
+SLOPPY = 'SLOPPY'
+
+compression_level = {
+ DEFAULT: 5,
+ COMPACT: 9,
+ SLOPPY: 9,
+}
logging.info('Generating output...')
print("/* == Start of generated table == */")
print('#include "hb.hh"')
print()
+
+# Write mapping data
+
code = packTab.Code('_hb_ucd')
sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array)
dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array)
('dm', dm, None, dm_order),
]
-for compression in (DEFAULT, COMPACT, SLOPPY):
+
+# Write main data
+
+for step in (DEFAULT, COMPACT, SLOPPY):
+ compression = compression_level[step]
logging.info(' Compression=%d:' % compression)
print()
- if compression == DEFAULT:
+ if step == DEFAULT:
print('#ifndef HB_OPTIMIZE_SIZE')
- elif compression == COMPACT:
+ elif step == COMPACT:
print('#elif !defined(HB_NO_UCD_UNASSIGNED)')
- else:
+ elif step == SLOPPY:
print('#else')
+ else:
+ assert False
print()
- if compression == SLOPPY:
+ if step == SLOPPY:
for i in range(len(gc)):
if (i % 128) and gc[i] == 'Cn':
gc[i] = gc[i - 1]
print()
+
print('#endif')
print()