import sys
if len (sys.argv) != 4:
- print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
+ print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt"
sys.exit (1)
-BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
+ALLOWED_SINGLES = [0x00A0, 0x25CC]
+ALLOWED_BLOCKS = [
+ 'Basic Latin',
+ 'Latin-1 Supplement',
+ 'Devanagari',
+ 'Bengali',
+ 'Gurmukhi',
+ 'Gujarati',
+ 'Oriya',
+ 'Tamil',
+ 'Telugu',
+ 'Kannada',
+ 'Malayalam',
+ 'Sinhala',
+ 'Myanmar',
+ 'Khmer',
+ 'Vedic Extensions',
+ 'General Punctuation',
+ 'Superscripts and Subscripts',
+ 'Devanagari Extended',
+ 'Myanmar Extended-B',
+ 'Myanmar Extended-A',
+]
files = [file (x) for x in sys.argv[1:]]
if not u in combined:
combined[u] = list (defaults)
combined[u][i] = v
-combined = {k:v for k,v in combined.items() if v[2] not in BLACKLISTED_BLOCKS}
+combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
data = combined
del combined
num = len (data)
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
-for u in [0x00A0, 0x25CC]:
+for u in ALLOWED_SINGLES:
singles[u] = data[u]
del data[u]
print "/*"
print " * The following table is generated by running:"
print " *"
-print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
+print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt"
print " *"
print " * on files with these headers:"
print " *"
pages = set([u>>page_bits for u in starts+ends+singles.keys()])
for p in sorted(pages):
print " case 0x%0Xu:" % p
- for (start,end) in zip (starts, ends):
- if p not in [start>>page_bits, end>>page_bits]: continue
- offset = "indic_offset_0x%04xu" % start
- print " if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
for u,d in singles.items ():
if p != u>>page_bits: continue
print " if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
+ for (start,end) in zip (starts, ends):
+ if p not in [start>>page_bits, end>>page_bits]: continue
+ offset = "indic_offset_0x%04xu" % start
+ print " if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
print " break;"
print ""
print " default:"