fields = [x.strip() for x in line.split(';')]
u = int(fields[0], 16)
- if u < 0x0600 or (u > 0x07FF and u != 0x200C and u != 0x200D):
+ if u == 0x200C or u == 0x200D:
+ continue
+ if u < 0x0600:
raise Exception ("Ooops, unexpected unicode character: ", fields)
dic[u] = fields
-print " /*"
-print " * The following table is generated by running:"
-print " *"
-print " * ./gen-arabic-joining-table.py < ArabicShaping.txt"
-print " *"
-print " * on the ArabicShaping.txt file with the header:"
-print " *"
+v = dic.keys()
+v.sort()
+min_u, max_u = v[0], v[-1]
+occupancy = len(v) * 100 / (max_u - min_u + 1)
+
+# Maintain at least 40% occupancy in the table */
+if occupancy < 40:
+ raise Exception ("Table too sparse, please investigate: ", occupancy)
+
+print "/* == Start of generated table == */"
+print "/*"
+print " * The following table is generated by running:"
+print " *"
+print " * ./gen-arabic-joining-table.py < ArabicShaping.txt"
+print " *"
+print " * on the ArabicShaping.txt file with the header:"
+print " *"
for line in header:
- print " * %s" % (line.strip())
-print " */"
-print " /* == Start of generated table == */"
-for i in range(0x0600, 0x0800):
+ print " * %s" % (line.strip())
+print " */"
+
+print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
+print "#define JOINING_TABLE_LAST 0x%04x" % max_u
+print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] ="
+print "{"
+
+for i in range(min_u, max_u + 1):
if i not in dic:
print " JOINING_TYPE_X, /* %04X */" % i
else:
else:
value = "JOINING_TYPE_" + entry[2]
print " %s, /* %s */" % (value, '; '.join(entry))
-print " /* == End of generated table == */"
+print " JOINING_TYPE_X /* dummy */"
+print "};"
+print "/* == End of generated table == */"
*/
+/* == Start of generated table == */
/*
- * Main joining-type table, covering U+0600..U+07FF.
- * Includes Arabic, Syriac, and N'ko.
+ * The following table is generated by running:
+ *
+ * ./gen-arabic-joining-table.py < ArabicShaping.txt
+ *
+ * on the ArabicShaping.txt file with the header:
+ *
+ * # ArabicShaping-6.1.0.txt
+ * # Date: 2010-11-09, 12:10:00 PST [KW]
*/
-static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] =
+#define JOINING_TABLE_FIRST 0x0600
+#define JOINING_TABLE_LAST 0x0858
+static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
{
- /*
- * The following table is generated by running:
- *
- * ./gen-arabic-joining-table.py < ArabicShaping.txt
- *
- * on the ArabicShaping.txt file with the header:
- *
- * # ArabicShaping-6.0.0.txt
- * # Date: 2010-04-30, 13:47:00 PDT [KW]
- */
- /* == Start of generated table == */
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
JOINING_TYPE_X, /* 07FD */
JOINING_TYPE_X, /* 07FE */
JOINING_TYPE_X, /* 07FF */
- /* == End of generated table == */
- JOINING_TYPE_X
+ JOINING_TYPE_X, /* 0800 */
+ JOINING_TYPE_X, /* 0801 */
+ JOINING_TYPE_X, /* 0802 */
+ JOINING_TYPE_X, /* 0803 */
+ JOINING_TYPE_X, /* 0804 */
+ JOINING_TYPE_X, /* 0805 */
+ JOINING_TYPE_X, /* 0806 */
+ JOINING_TYPE_X, /* 0807 */
+ JOINING_TYPE_X, /* 0808 */
+ JOINING_TYPE_X, /* 0809 */
+ JOINING_TYPE_X, /* 080A */
+ JOINING_TYPE_X, /* 080B */
+ JOINING_TYPE_X, /* 080C */
+ JOINING_TYPE_X, /* 080D */
+ JOINING_TYPE_X, /* 080E */
+ JOINING_TYPE_X, /* 080F */
+ JOINING_TYPE_X, /* 0810 */
+ JOINING_TYPE_X, /* 0811 */
+ JOINING_TYPE_X, /* 0812 */
+ JOINING_TYPE_X, /* 0813 */
+ JOINING_TYPE_X, /* 0814 */
+ JOINING_TYPE_X, /* 0815 */
+ JOINING_TYPE_X, /* 0816 */
+ JOINING_TYPE_X, /* 0817 */
+ JOINING_TYPE_X, /* 0818 */
+ JOINING_TYPE_X, /* 0819 */
+ JOINING_TYPE_X, /* 081A */
+ JOINING_TYPE_X, /* 081B */
+ JOINING_TYPE_X, /* 081C */
+ JOINING_TYPE_X, /* 081D */
+ JOINING_TYPE_X, /* 081E */
+ JOINING_TYPE_X, /* 081F */
+ JOINING_TYPE_X, /* 0820 */
+ JOINING_TYPE_X, /* 0821 */
+ JOINING_TYPE_X, /* 0822 */
+ JOINING_TYPE_X, /* 0823 */
+ JOINING_TYPE_X, /* 0824 */
+ JOINING_TYPE_X, /* 0825 */
+ JOINING_TYPE_X, /* 0826 */
+ JOINING_TYPE_X, /* 0827 */
+ JOINING_TYPE_X, /* 0828 */
+ JOINING_TYPE_X, /* 0829 */
+ JOINING_TYPE_X, /* 082A */
+ JOINING_TYPE_X, /* 082B */
+ JOINING_TYPE_X, /* 082C */
+ JOINING_TYPE_X, /* 082D */
+ JOINING_TYPE_X, /* 082E */
+ JOINING_TYPE_X, /* 082F */
+ JOINING_TYPE_X, /* 0830 */
+ JOINING_TYPE_X, /* 0831 */
+ JOINING_TYPE_X, /* 0832 */
+ JOINING_TYPE_X, /* 0833 */
+ JOINING_TYPE_X, /* 0834 */
+ JOINING_TYPE_X, /* 0835 */
+ JOINING_TYPE_X, /* 0836 */
+ JOINING_TYPE_X, /* 0837 */
+ JOINING_TYPE_X, /* 0838 */
+ JOINING_TYPE_X, /* 0839 */
+ JOINING_TYPE_X, /* 083A */
+ JOINING_TYPE_X, /* 083B */
+ JOINING_TYPE_X, /* 083C */
+ JOINING_TYPE_X, /* 083D */
+ JOINING_TYPE_X, /* 083E */
+ JOINING_TYPE_X, /* 083F */
+ JOINING_TYPE_R, /* 0840; MANDAIC HALQA; R; No_Joining_Group */
+ JOINING_TYPE_D, /* 0841; MANDAIC AB; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0842; MANDAIC AG; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0843; MANDAIC AD; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0844; MANDAIC AH; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0845; MANDAIC USHENNA; D; No_Joining_Group */
+ JOINING_TYPE_R, /* 0846; MANDAIC AZ; R; No_Joining_Group */
+ JOINING_TYPE_D, /* 0847; MANDAIC IT; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0848; MANDAIC ATT; D; No_Joining_Group */
+ JOINING_TYPE_R, /* 0849; MANDAIC AKSA; R; No_Joining_Group */
+ JOINING_TYPE_D, /* 084A; MANDAIC AK; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 084B; MANDAIC AL; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 084C; MANDAIC AM; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 084D; MANDAIC AN; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 084E; MANDAIC AS; D; No_Joining_Group */
+ JOINING_TYPE_R, /* 084F; MANDAIC IN; R; No_Joining_Group */
+ JOINING_TYPE_D, /* 0850; MANDAIC AP; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0851; MANDAIC ASZ; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0852; MANDAIC AQ; D; No_Joining_Group */
+ JOINING_TYPE_D, /* 0853; MANDAIC AR; D; No_Joining_Group */
+ JOINING_TYPE_R, /* 0854; MANDAIC ASH; R; No_Joining_Group */
+ JOINING_TYPE_D, /* 0855; MANDAIC AT; D; No_Joining_Group */
+ JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
+ JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
+ JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
+ JOINING_TYPE_X /* dummy */
};
+/* == End of generated table == */
static unsigned int get_joining_type (hb_codepoint_t u, hb_category_t gen_cat)
{
/* TODO Macroize the magic bit operations */
- if (likely ((u & ~(0x0600^0x07FF)) == 0x0600)) {
- unsigned int j_type = arabic_syriac_nko_joining_types[u - 0x0600];
+ if (likely (JOINING_TABLE_FIRST <= u && u <= JOINING_TABLE_LAST)) {
+ unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
if (likely (j_type != JOINING_TYPE_X))
return j_type;
}