#!/usr/bin/env python
+# flake8: noqa
from __future__ import print_function, division, absolute_import
-import io, sys
+import io
+import sys
if len (sys.argv) != 5:
print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
sys.exit (1)
-BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
+BLACKLISTED_BLOCKS = ["Thai", "Lao"]
files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
data[0][0x034F] = defaults[0]
data[0][0x2060] = defaults[0]
data[0][0x20F0] = defaults[0]
+# TODO https://github.com/roozbehp/unicode-data/issues/9
+data[0][0x11C44] = 'Consonant_Placeholder'
+data[0][0x11C45] = 'Consonant_Placeholder'
+# TODO https://github.com/harfbuzz/harfbuzz/pull/1399
+data[0][0x111C8] = 'Consonant_Placeholder'
for u in range (0xFE00, 0xFE0F + 1):
data[0][u] = defaults[0]
def is_BASE_IND(U, UISC, UGC):
#SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
return (UISC in [Consonant_Dead, Modifying_Letter] or
- (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x11A3F, 0x11A45]) or
+ (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x111C8, 0x11A3F, 0x11A45, 0x11C44, 0x11C45]) or
False # SPEC-DRAFT-OUTDATED! U == 0x002D
)
def is_BASE_NUM(U, UISC, UGC):
def is_CONS_WITH_STACKER(U, UISC, UGC):
return UISC == Consonant_With_Stacker
def is_HALANT(U, UISC, UGC):
- return UISC in [Virama, Invisible_Stacker]
+ return UISC in [Virama, Invisible_Stacker] and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC)
+def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC):
+ # https://github.com/harfbuzz/harfbuzz/issues/1102
+ # https://github.com/harfbuzz/harfbuzz/issues/1379
+ return U in [0x11046, 0x1134D]
def is_HALANT_NUM(U, UISC, UGC):
return UISC == Number_Joiner
def is_ZWNJ(U, UISC, UGC):
'SUB': is_CONS_SUB,
'CS': is_CONS_WITH_STACKER,
'H': is_HALANT,
+ 'HVM': is_HALANT_OR_VOWEL_MODIFIER,
'HN': is_HALANT_NUM,
'ZWNJ': is_ZWNJ,
'ZWJ': is_ZWJ,
'V': {
'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
'Blw': [Bottom, Overstruck, Bottom_And_Right],
- 'Pst': [Right],
- 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+ 'Pst': [Right, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+ 'Pre': [Left],
},
'VM': {
'Abv': [Top],
'Blw': [Bottom],
},
'H': None,
+ 'HVM': None,
'B': None,
'FM': None,
'SUB': None,
# Resolve Indic_Syllabic_Category
- # TODO: These don't have UISC assigned in Unicode 8.0, but
- # have UIPC
+ # TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
if U == 0x17DD: UISC = Vowel_Dependent
if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
+ # Tibetan:
+ # TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
+ if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
+ if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
+ # Overrides to allow NFC order matching syllable
+ # https://github.com/harfbuzz/harfbuzz/issues/1012
+ if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC):
+ if UIPC == Top:
+ UIPC = Bottom
+
+ # TODO: https://github.com/harfbuzz/harfbuzz/pull/982
+ # also https://github.com/harfbuzz/harfbuzz/issues/1012
+ if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
+ if UIPC == Top:
+ UIPC = Bottom
+ elif UIPC == Bottom:
+ UIPC = Top
+
# TODO: https://github.com/harfbuzz/harfbuzz/pull/627
if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
# TODO: https://github.com/harfbuzz/harfbuzz/pull/626
if U == 0xA8B4: UISC = Consonant_Medial
+ # TODO: https://github.com/harfbuzz/harfbuzz/issues/1105
+ if U == 0x11134: UISC = Gemination_Mark
+
+ # TODO: https://github.com/harfbuzz/harfbuzz/pull/1399
+ if U == 0x111C9: UISC = Consonant_Final
+
values = [k for k,v in items if v(U,UISC,UGC)]
assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
USE = values[0]
if 0xA926 <= U <= 0xA92A: UIPC = Top
if U == 0x111CA: UIPC = Bottom
if U == 0x11300: UIPC = Top
+ # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
+ if U == 0x11302: UIPC = Top
+ if U == 0x1133C: UIPC = Bottom
if U == 0x1171E: UIPC = Left # Correct?!
if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
print (" * %s" % (l.strip()))
print (" */")
print ()
-print ('#include "hb-ot-shape-complex-use-private.hh"')
+print ('#include "hb-ot-shape-complex-use.hh"')
print ()
total = 0
offset = 0
starts = []
ends = []
+print ('#pragma GCC diagnostic push')
+print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
for k,v in sorted(use_mapping.items()):
if k in use_positions and use_positions[k]: continue
print ("#define %s USE_%s /* %s */" % (k, k, v.__name__[3:]))
for suf in v.keys():
tag = k + suf
print ("#define %s USE_%s" % (tag, tag))
+print ('#pragma GCC diagnostic pop')
print ("")
print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {")
for u in uu: