[use] Fix shaping of U+AA29 CHAM VOWEL SIGN AA
authorBehdad Esfahbod <behdad@behdad.org>
Fri, 14 Jul 2017 15:38:51 +0000 (16:38 +0100)
committerBehdad Esfahbod <behdad@behdad.org>
Fri, 14 Jul 2017 15:38:51 +0000 (16:38 +0100)
Part of https://github.com/behdad/harfbuzz/issues/376
Also see https://github.com/roozbehp/unicode-data/issues/6

Test added, using NotoSansCham built from Noto Phase III sources.

src/gen-arabic-table.py
src/gen-indic-table.py
src/gen-use-table.py
src/hb-ot-shape-complex-use-table.cc
test/shaping/Makefile.am
test/shaping/fonts/sha1sum/96490dd2ff81233b335a650e7eb660e0e7b2eeea.ttf [new file with mode: 0644]
test/shaping/tests/use-syllable.tests [new file with mode: 0644]

index 308435f..59bd760 100755 (executable)
@@ -134,7 +134,7 @@ def print_joining_table(f):
                for (start,end) in ranges:
                        if p not in [start>>page_bits, end>>page_bits]: continue
                        offset = "joining_offset_0x%04xu" % start
-                       print "      if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset)
+                       print "      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset)
                print "      break;"
                print ""
        print "    default:"
index 3016cd0..a849db1 100755 (executable)
@@ -232,7 +232,7 @@ for p in sorted(pages):
        for (start,end) in zip (starts, ends):
                if p not in [start>>page_bits, end>>page_bits]: continue
                offset = "indic_offset_0x%04xu" % start
-               print "      if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
+               print "      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
        for u,d in singles.items ():
                if p != u>>page_bits: continue
                print "      if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
index a922c92..fcb66a5 100755 (executable)
@@ -210,11 +210,13 @@ def is_SYM_MOD(U, UISC, UGC):
 def is_VARIATION_SELECTOR(U, UISC, UGC):
        return 0xFE00 <= U <= 0xFE0F
 def is_VOWEL(U, UISC, UGC):
+       # https://github.com/roozbehp/unicode-data/issues/6
        return (UISC == Pure_Killer or
-               (UGC != Lo and UISC in [Vowel, Vowel_Dependent]))
+               (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
 def is_VOWEL_MOD(U, UISC, UGC):
+       # https://github.com/roozbehp/unicode-data/issues/6
        return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
-               (UGC != Lo and UISC == Bindu))
+               (UGC != Lo and (UISC == Bindu or U in [0xAA29])))
 
 use_mapping = {
        'B':    is_BASE,
@@ -449,7 +451,7 @@ for p in sorted(pages):
        for (start,end) in zip (starts, ends):
                if p not in [start>>page_bits, end>>page_bits]: continue
                offset = "use_offset_0x%04xu" % start
-               print "      if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
+               print "      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
        for u,d in singles.items ():
                if p != u>>page_bits: continue
                print "      if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0])
index 416d748..941a003 100644 (file)
@@ -9,7 +9,7 @@
  * # IndicSyllabicCategory-9.0.0.txt
  * # Date: 2016-05-21, 02:46:00 GMT [RP]
  * # IndicPositionalCategory-9.0.0.txt
- * # Date: 2016-02-25, 00:48:00 GMT [RP]
+ * # Date: 2016-06-09, 19:33:00 GMT [RP]
  * # Blocks-9.0.0.txt
  * # Date: 2016-02-05, 23:48:00 GMT [KW]
  * UnicodeData.txt does not have a header.
@@ -410,7 +410,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
 
   /* AA00 */     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,
   /* AA10 */     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,
-  /* AA20 */     B,     B,     B,     B,     B,     B,     B,     B,     B,  VAbv,  VAbv,  VAbv,  VAbv,  VBlw,  VAbv,  VPre,
+  /* AA20 */     B,     B,     B,     B,     B,     B,     B,     B,     B, VMAbv,  VAbv,  VAbv,  VAbv,  VBlw,  VAbv,  VPre,
   /* AA30 */  VPre,  VAbv,  VBlw,  MPst,  MPre,  MBlw,  MBlw,     O,     O,     O,     O,     O,     O,     O,     O,     O,
   /* AA40 */     B,     B,     B,  FAbv,     B,     B,     B,     B,     B,     B,     B,     B,  FAbv,  FPst,     O,     O,
   /* AA50 */     B,     B,     B,     B,     B,     B,     B,     B,     B,     B,     O,     O,     O,     O,     O,     O,
index 48569f0..e444101 100644 (file)
@@ -71,6 +71,7 @@ TESTS = \
        tests/tibetan-vowels.tests \
        tests/use.tests \
        tests/use-marchen.tests \
+       tests/use-syllable.tests \
        tests/vertical.tests \
        tests/zero-width-marks.tests \
        $(NULL)
diff --git a/test/shaping/fonts/sha1sum/96490dd2ff81233b335a650e7eb660e0e7b2eeea.ttf b/test/shaping/fonts/sha1sum/96490dd2ff81233b335a650e7eb660e0e7b2eeea.ttf
new file mode 100644 (file)
index 0000000..78518c0
Binary files /dev/null and b/test/shaping/fonts/sha1sum/96490dd2ff81233b335a650e7eb660e0e7b2eeea.ttf differ
diff --git a/test/shaping/tests/use-syllable.tests b/test/shaping/tests/use-syllable.tests
new file mode 100644 (file)
index 0000000..b864c3e
--- /dev/null
@@ -0,0 +1 @@
+fonts/sha1sum/96490dd2ff81233b335a650e7eb660e0e7b2eeea.ttf::U+AA00,U+AA2D,U+AA29:[a_cham=0+1121|uSign_cham=0@14,0+0|.notdef=0+600]