Imported Upstream version 2.3.1
[platform/upstream/harfbuzz.git] / src / gen-use-table.py
index 6aa5f88..be204b6 100755 (executable)
@@ -1,14 +1,16 @@
 #!/usr/bin/env python
+# flake8: noqa
 
 from __future__ import print_function, division, absolute_import
 
-import io, sys
+import io
+import sys
 
 if len (sys.argv) != 5:
        print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
        sys.exit (1)
 
-BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
+BLACKLISTED_BLOCKS = ["Thai", "Lao"]
 
 files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
 
@@ -47,6 +49,11 @@ defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
 data[0][0x034F] = defaults[0]
 data[0][0x2060] = defaults[0]
 data[0][0x20F0] = defaults[0]
+# TODO https://github.com/roozbehp/unicode-data/issues/9
+data[0][0x11C44] = 'Consonant_Placeholder'
+data[0][0x11C45] = 'Consonant_Placeholder'
+# TODO https://github.com/harfbuzz/harfbuzz/pull/1399
+data[0][0x111C8] = 'Consonant_Placeholder'
 for u in range (0xFE00, 0xFE0F + 1):
        data[0][u] = defaults[0]
 
@@ -165,7 +172,7 @@ def is_BASE(U, UISC, UGC):
 def is_BASE_IND(U, UISC, UGC):
        #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
        return (UISC in [Consonant_Dead, Modifying_Letter] or
-               (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x11A3F, 0x11A45]) or
+               (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x111C8, 0x11A3F, 0x11A45, 0x11C44, 0x11C45]) or
                False # SPEC-DRAFT-OUTDATED! U == 0x002D
                )
 def is_BASE_NUM(U, UISC, UGC):
@@ -194,7 +201,11 @@ def is_CONS_SUB(U, UISC, UGC):
 def is_CONS_WITH_STACKER(U, UISC, UGC):
        return UISC == Consonant_With_Stacker
 def is_HALANT(U, UISC, UGC):
-       return UISC in [Virama, Invisible_Stacker]
+       return UISC in [Virama, Invisible_Stacker] and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC)
+def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC):
+       # https://github.com/harfbuzz/harfbuzz/issues/1102
+       # https://github.com/harfbuzz/harfbuzz/issues/1379
+       return U in [0x11046, 0x1134D]
 def is_HALANT_NUM(U, UISC, UGC):
        return UISC == Number_Joiner
 def is_ZWNJ(U, UISC, UGC):
@@ -245,6 +256,7 @@ use_mapping = {
        'SUB':  is_CONS_SUB,
        'CS':   is_CONS_WITH_STACKER,
        'H':    is_HALANT,
+       'HVM':  is_HALANT_OR_VOWEL_MODIFIER,
        'HN':   is_HALANT_NUM,
        'ZWNJ': is_ZWNJ,
        'ZWJ':  is_ZWJ,
@@ -278,8 +290,8 @@ use_positions = {
        'V': {
                'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
                'Blw': [Bottom, Overstruck, Bottom_And_Right],
-               'Pst': [Right],
-               'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+               'Pst': [Right, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+               'Pre': [Left],
        },
        'VM': {
                'Abv': [Top],
@@ -292,6 +304,7 @@ use_positions = {
                'Blw': [Bottom],
        },
        'H': None,
+       'HVM': None,
        'B': None,
        'FM': None,
        'SUB': None,
@@ -304,11 +317,28 @@ def map_to_use(data):
 
                # Resolve Indic_Syllabic_Category
 
-               # TODO: These don't have UISC assigned in Unicode 8.0, but
-               # have UIPC
+               # TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
                if U == 0x17DD: UISC = Vowel_Dependent
                if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
 
+               # Tibetan:
+               # TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
+               if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
+               if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
+               # Overrides to allow NFC order matching syllable
+               # https://github.com/harfbuzz/harfbuzz/issues/1012
+               if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC):
+                       if UIPC == Top:
+                               UIPC = Bottom
+
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/982
+               # also  https://github.com/harfbuzz/harfbuzz/issues/1012
+               if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
+                       if UIPC == Top:
+                               UIPC = Bottom
+                       elif UIPC == Bottom:
+                               UIPC = Top
+
                # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
                if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
 
@@ -325,6 +355,12 @@ def map_to_use(data):
                # TODO: https://github.com/harfbuzz/harfbuzz/pull/626
                if U == 0xA8B4: UISC = Consonant_Medial
 
+               # TODO: https://github.com/harfbuzz/harfbuzz/issues/1105
+               if U == 0x11134: UISC = Gemination_Mark
+
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/1399
+               if U == 0x111C9: UISC = Consonant_Final
+
                values = [k for k,v in items if v(U,UISC,UGC)]
                assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
                USE = values[0]
@@ -344,6 +380,9 @@ def map_to_use(data):
                if 0xA926 <= U <= 0xA92A: UIPC = Top
                if U == 0x111CA: UIPC = Bottom
                if U == 0x11300: UIPC = Top
+               # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
+               if U == 0x11302: UIPC = Top
+               if U == 0x1133C: UIPC = Bottom
                if U == 0x1171E: UIPC = Left # Correct?!
                if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
                if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
@@ -378,7 +417,7 @@ for h in headers:
                print (" * %s" % (l.strip()))
 print (" */")
 print ()
-print ('#include "hb-ot-shape-complex-use-private.hh"')
+print ('#include "hb-ot-shape-complex-use.hh"')
 print ()
 
 total = 0
@@ -416,6 +455,8 @@ num = 0
 offset = 0
 starts = []
 ends = []
+print ('#pragma GCC diagnostic push')
+print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
 for k,v in sorted(use_mapping.items()):
        if k in use_positions and use_positions[k]: continue
        print ("#define %s      USE_%s  /* %s */" % (k, k, v.__name__[3:]))
@@ -424,6 +465,7 @@ for k,v in sorted(use_positions.items()):
        for suf in v.keys():
                tag = k + suf
                print ("#define %s      USE_%s" % (tag, tag))
+print ('#pragma GCC diagnostic pop')
 print ("")
 print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {")
 for u in uu: