Imported Upstream version 8.2.2

[platform/upstream/harfbuzz.git] / src / gen-ucd-table.py
diff --git a/src/gen-ucd-table.py b/src/gen-ucd-table.py

index 402de33..d85ae4f 100755 (executable)
--- a/src/gen-ucd-table.py
+++ b/src/gen-ucd-table.py
@@ -6,7 +6,7 @@ Input file:
  * https://unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip
  """
  
-import os.path, sys, re
+import sys, re
  import logging
  logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
  
@@ -25,14 +25,28 @@ hb_common_h = 'hb-common.h' if len (sys.argv) < 3 else sys.argv[2]
  
  logging.info('Preparing data tables...')
  
+
+# This is how the data is encoded:
+#
+# General_Category (gc), Canonical_Combining_Class (ccc),
+# and Script (sc) are encoded as integers.
+#
+# Mirroring character (bmg) is encoded as difference from
+# the original character.
+#
+# Composition & Decomposition (dm) are encoded elaborately,
+# as discussed below.
+
  gc = [u['gc'] for u in ucd]
  ccc = [int(u['ccc']) for u in ucd]
  bmg = [int(v, 16) - int(u) if v else 0 for u,v in enumerate(u['bmg'] for u in ucd)]
-#gc_ccc_non0 = set((cat,klass) for cat,klass in zip(gc,ccc) if klass)
-#gc_bmg_non0 = set((cat,mirr) for cat,mirr in zip(gc, bmg) if mirr)
-
  sc = [u['sc'] for u in ucd]
  
+
+# Prepare Compose / Decompose data
+#
+# This code is very dense.  See hb_ucd_compose() / hb_ucd_decompose() for the logic.
+
  dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd)
        if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)}
  ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
@@ -63,6 +77,9 @@ dm_order = {None: 0}
  dm_order.update(dm1_order)
  dm_order.update(dm2_order)
  
+
+# Prepare General_Category / Script mapping arrays
+
  gc_order = dict()
  for i,v in enumerate(('Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
                        'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf',
@@ -83,10 +100,18 @@ for line in open(hb_common_h):
      sc_order[i] = tag
      sc_array.append(name)
  
-DEFAULT = 1
-COMPACT = 3
-SLOPPY  = 5
  
+# Write out main data
+
+DEFAULT = 'DEFAULT'
+COMPACT = 'COMPACT'
+SLOPPY  = 'SLOPPY'
+
+compression_level = {
+    DEFAULT: 5,
+    COMPACT: 9,
+    SLOPPY:  9,
+}
  
  logging.info('Generating output...')
  print("/* == Start of generated table == */")
@@ -104,6 +129,9 @@ print()
  print('#include "hb.hh"')
  print()
  
+
+# Write mapping data
+
  code = packTab.Code('_hb_ucd')
  sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array)
  dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array)
@@ -120,18 +148,24 @@ datasets = [
      ('dm', dm, None, dm_order),
  ]
  
-for compression in (DEFAULT, COMPACT, SLOPPY):
+
+# Write main data
+
+for step in (DEFAULT, COMPACT, SLOPPY):
+    compression = compression_level[step]
      logging.info('  Compression=%d:' % compression)
      print()
-    if compression == DEFAULT:
+    if step == DEFAULT:
          print('#ifndef HB_OPTIMIZE_SIZE')
-    elif compression == COMPACT:
+    elif step == COMPACT:
          print('#elif !defined(HB_NO_UCD_UNASSIGNED)')
-    else:
+    elif step == SLOPPY:
          print('#else')
+    else:
+        assert False
      print()
  
-    if compression == SLOPPY:
+    if step == SLOPPY:
          for i in range(len(gc)):
              if (i % 128) and gc[i] == 'Cn':
                  gc[i] = gc[i - 1]
@@ -157,6 +191,7 @@ for compression in (DEFAULT, COMPACT, SLOPPY):
  
      print()
  
+
  print('#endif')
  print()