Imported Upstream version 2.3.1

[platform/upstream/harfbuzz.git] / src / gen-use-table.py
diff --git a/src/gen-use-table.py b/src/gen-use-table.py

index 6aa5f88..be204b6 100755 (executable)
--- a/src/gen-use-table.py
+++ b/src/gen-use-table.py
@@ -1,14 +1,16 @@
  #!/usr/bin/env python
+# flake8: noqa
  
  from __future__ import print_function, division, absolute_import
  
-import io, sys
+import io
+import sys
  
  if len (sys.argv) != 5:
         print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
         sys.exit (1)
  
-BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
+BLACKLISTED_BLOCKS = ["Thai", "Lao"]
  
  files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
  
@@ -47,6 +49,11 @@ defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
  data[0][0x034F] = defaults[0]
  data[0][0x2060] = defaults[0]
  data[0][0x20F0] = defaults[0]
+# TODO https://github.com/roozbehp/unicode-data/issues/9
+data[0][0x11C44] = 'Consonant_Placeholder'
+data[0][0x11C45] = 'Consonant_Placeholder'
+# TODO https://github.com/harfbuzz/harfbuzz/pull/1399
+data[0][0x111C8] = 'Consonant_Placeholder'
  for u in range (0xFE00, 0xFE0F + 1):
         data[0][u] = defaults[0]
  
@@ -165,7 +172,7 @@ def is_BASE(U, UISC, UGC):
  def is_BASE_IND(U, UISC, UGC):
         #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
         return (UISC in [Consonant_Dead, Modifying_Letter] or
-               (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x11A3F, 0x11A45]) or
+               (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x111C8, 0x11A3F, 0x11A45, 0x11C44, 0x11C45]) or
                 False # SPEC-DRAFT-OUTDATED! U == 0x002D
                 )
  def is_BASE_NUM(U, UISC, UGC):
@@ -194,7 +201,11 @@ def is_CONS_SUB(U, UISC, UGC):
  def is_CONS_WITH_STACKER(U, UISC, UGC):
         return UISC == Consonant_With_Stacker
  def is_HALANT(U, UISC, UGC):
-       return UISC in [Virama, Invisible_Stacker]
+       return UISC in [Virama, Invisible_Stacker] and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC)
+def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC):
+       # https://github.com/harfbuzz/harfbuzz/issues/1102
+       # https://github.com/harfbuzz/harfbuzz/issues/1379
+       return U in [0x11046, 0x1134D]
  def is_HALANT_NUM(U, UISC, UGC):
         return UISC == Number_Joiner
  def is_ZWNJ(U, UISC, UGC):
@@ -245,6 +256,7 @@ use_mapping = {
         'SUB':  is_CONS_SUB,
         'CS':   is_CONS_WITH_STACKER,
         'H':    is_HALANT,
+       'HVM':  is_HALANT_OR_VOWEL_MODIFIER,
         'HN':   is_HALANT_NUM,
         'ZWNJ': is_ZWNJ,
         'ZWJ':  is_ZWJ,
@@ -278,8 +290,8 @@ use_positions = {
         'V': {
                 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
                 'Blw': [Bottom, Overstruck, Bottom_And_Right],
-               'Pst': [Right],
-               'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+               'Pst': [Right, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+               'Pre': [Left],
         },
         'VM': {
                 'Abv': [Top],
@@ -292,6 +304,7 @@ use_positions = {
                 'Blw': [Bottom],
         },
         'H': None,
+       'HVM': None,
         'B': None,
         'FM': None,
         'SUB': None,
@@ -304,11 +317,28 @@ def map_to_use(data):
  
                 # Resolve Indic_Syllabic_Category
  
-               # TODO: These don't have UISC assigned in Unicode 8.0, but
-               # have UIPC
+               # TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
                 if U == 0x17DD: UISC = Vowel_Dependent
                 if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
  
+               # Tibetan:
+               # TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
+               if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
+               if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
+               # Overrides to allow NFC order matching syllable
+               # https://github.com/harfbuzz/harfbuzz/issues/1012
+               if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC):
+                       if UIPC == Top:
+                               UIPC = Bottom
+
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/982
+               # also  https://github.com/harfbuzz/harfbuzz/issues/1012
+               if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
+                       if UIPC == Top:
+                               UIPC = Bottom
+                       elif UIPC == Bottom:
+                               UIPC = Top
+
                 # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
                 if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
  
@@ -325,6 +355,12 @@ def map_to_use(data):
                 # TODO: https://github.com/harfbuzz/harfbuzz/pull/626
                 if U == 0xA8B4: UISC = Consonant_Medial
  
+               # TODO: https://github.com/harfbuzz/harfbuzz/issues/1105
+               if U == 0x11134: UISC = Gemination_Mark
+
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/1399
+               if U == 0x111C9: UISC = Consonant_Final
+
                 values = [k for k,v in items if v(U,UISC,UGC)]
                 assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
                 USE = values[0]
@@ -344,6 +380,9 @@ def map_to_use(data):
                 if 0xA926 <= U <= 0xA92A: UIPC = Top
                 if U == 0x111CA: UIPC = Bottom
                 if U == 0x11300: UIPC = Top
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
+               if U == 0x11302: UIPC = Top
+               if U == 0x1133C: UIPC = Bottom
                 if U == 0x1171E: UIPC = Left # Correct?!
                 if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
                 if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
@@ -378,7 +417,7 @@ for h in headers:
                 print (" * %s" % (l.strip()))
  print (" */")
  print ()
-print ('#include "hb-ot-shape-complex-use-private.hh"')
+print ('#include "hb-ot-shape-complex-use.hh"')
  print ()
  
  total = 0
@@ -416,6 +455,8 @@ num = 0
  offset = 0
  starts = []
  ends = []
+print ('#pragma GCC diagnostic push')
+print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
  for k,v in sorted(use_mapping.items()):
         if k in use_positions and use_positions[k]: continue
         print ("#define %s      USE_%s  /* %s */" % (k, k, v.__name__[3:]))
@@ -424,6 +465,7 @@ for k,v in sorted(use_positions.items()):
         for suf in v.keys():
                 tag = k + suf
                 print ("#define %s      USE_%s" % (tag, tag))
+print ('#pragma GCC diagnostic pop')
  print ("")
  print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {")
  for u in uu: