-#!/usr/bin/env python
+#!/usr/bin/env python3
-from __future__ import print_function, division, absolute_import
+"""usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
-import io, os.path, sys
+Input files:
+* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt
+* https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
+"""
+
+import os.path, sys
if len (sys.argv) != 4:
- print ("usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
- sys.exit (1)
+ sys.exit (__doc__)
-files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
+files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
headers = [[files[0].readline (), files[0].readline ()], [files[2].readline (), files[2].readline ()]]
headers.append (["UnicodeData.txt does not have a header."])
values[u] = value
short_value = {}
- for value in set([v for v in values.values()] + ['JOINING_TYPE_X']):
+ for value in sorted (set ([v for v in values.values ()] + ['JOINING_TYPE_X'])):
short = ''.join(x[0] for x in value.split('_')[2:])
assert short not in short_value.values()
short_value[value] = short
print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
print ()
- page_bits = 12;
+ page_bits = 12
print ()
print ("static unsigned int")
print ("joining_type (hb_codepoint_t u)")