import sys
-header = sys.stdin.readline(), sys.stdin.readline()
-dic = dict()
-for line in sys.stdin:
- if line[:1] != '0':
- continue
-
- fields = [x.strip() for x in line.split(';')]
- u = int(fields[0], 16)
+header = sys.stdin.readline (), sys.stdin.readline ()
+while sys.stdin.readline ().find ('##################') < 0:
+ pass
- if u == 0x200C or u == 0x200D:
- continue
- if u < 0x0600:
- raise Exception ("Ooops, unexpected unicode character: ", fields)
- dic[u] = fields
-
-v = dic.keys()
-v.sort()
-min_u, max_u = v[0], v[-1]
-occupancy = len(v) * 100 / (max_u - min_u + 1)
-
-# Maintain at least 40% occupancy in the table */
-if occupancy < 40:
- raise Exception ("Table too sparse, please investigate: ", occupancy)
print "/* == Start of generated table == */"
print "/*"
print " * %s" % (line.strip())
print " */"
-print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
-print "#define JOINING_TABLE_LAST 0x%04x" % max_u
-print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] ="
+print "static const uint8_t joining_table[] ="
print "{"
-for i in range(min_u, max_u + 1):
- if i not in dic:
- print " JOINING_TYPE_X, /* %04X */" % i
+
+min_u = 0x110000
+max_u = 0
+num = 0
+last = -1
+block = ''
+for line in sys.stdin:
+
+ if line[0] == '#':
+ if line.find (" characters"):
+ block = line[2:].strip ()
+ continue
+
+ fields = [x.strip () for x in line.split (';')]
+ if len (fields) == 1:
+ continue
+
+ u = int (fields[0], 16)
+ if u == 0x200C or u == 0x200D:
+ continue
+ if u < last:
+ raise Exception ("Input data character not sorted", u)
+ min_u = min (min_u, u)
+ max_u = max (max_u, u)
+ num += 1
+
+ if block:
+ print "\n /* %s */\n" % block
+ block = ''
+
+ if last != -1:
+ last += 1
+ while last < u:
+ print " JOINING_TYPE_X, /* %04X */" % last
+ last += 1
+ else:
+ last = u
+
+ if fields[3] in ["ALAPH", "DALATH RISH"]:
+ value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
else:
- entry = dic[i]
- if entry[3] in ["ALAPH", "DALATH RISH"]:
- value = "JOINING_GROUP_" + entry[3].replace(' ', '_')
- else:
- value = "JOINING_TYPE_" + entry[2]
- print " %s, /* %s */" % (value, '; '.join(entry))
+ value = "JOINING_TYPE_" + fields[2]
+ print " %s, /* %s */" % (value, '; '.join(fields))
+
+print
print " JOINING_TYPE_X /* dummy */"
print "};"
+print
+
+print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
+print "#define JOINING_TABLE_LAST 0x%04x" % max_u
+print
+
print "/* == End of generated table == */"
+
+occupancy = num * 100 / (max_u - min_u + 1)
+# Maintain at least 40% occupancy in the table */
+if occupancy < 40:
+ raise Exception ("Table too sparse, please investigate: ", occupancy)
* # ArabicShaping-6.1.0.txt
* # Date: 2010-11-09, 12:10:00 PST [KW]
*/
-#define JOINING_TABLE_FIRST 0x0600
-#define JOINING_TABLE_LAST 0x0858
-static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
+static const uint8_t joining_table[] =
{
+
+ /* Arabic characters */
+
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
JOINING_TYPE_X, /* 06FD */
JOINING_TYPE_X, /* 06FE */
JOINING_TYPE_D, /* 06FF; HEH WITH INVERTED V; D; KNOTTED HEH */
+
+ /* Syriac characters */
+
JOINING_TYPE_X, /* 0700 */
JOINING_TYPE_X, /* 0701 */
JOINING_TYPE_X, /* 0702 */
JOINING_TYPE_R, /* 074D; SOGDIAN ZHAIN; R; ZHAIN */
JOINING_TYPE_D, /* 074E; SOGDIAN KHAPH; D; KHAPH */
JOINING_TYPE_D, /* 074F; SOGDIAN FE; D; FE */
+
+ /* Arabic supplement characters */
+
JOINING_TYPE_D, /* 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH */
JOINING_TYPE_D, /* 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH */
JOINING_TYPE_D, /* 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH */
JOINING_TYPE_D, /* 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN */
JOINING_TYPE_D, /* 077E; SEEN WITH INVERTED V; D; SEEN */
JOINING_TYPE_D, /* 077F; KAF WITH 2 DOTS ABOVE; D; KAF */
+
+ /* N'Ko Characters */
+
JOINING_TYPE_X, /* 0780 */
JOINING_TYPE_X, /* 0781 */
JOINING_TYPE_X, /* 0782 */
JOINING_TYPE_X, /* 07F8 */
JOINING_TYPE_X, /* 07F9 */
JOINING_TYPE_C, /* 07FA; NKO LAJANYALAN; C; No_Joining_Group */
+
+ /* Mandaic Characters */
+
JOINING_TYPE_X, /* 07FB */
JOINING_TYPE_X, /* 07FC */
JOINING_TYPE_X, /* 07FD */
JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
+
JOINING_TYPE_X /* dummy */
};
+
+#define JOINING_TABLE_FIRST 0x0600
+#define JOINING_TABLE_LAST 0x0858
+
/* == End of generated table == */
HB_END_DECLS