[Indic] Better categorize Register Shifters and Khmer Various signs
authorBehdad Esfahbod <behdad@behdad.org>
Tue, 17 Jul 2012 21:53:03 +0000 (17:53 -0400)
committerBehdad Esfahbod <behdad@behdad.org>
Tue, 17 Jul 2012 21:53:03 +0000 (17:53 -0400)
Down another 500 or so Khmer failures!

src/hb-ot-shape-complex-indic-machine.rl
src/hb-ot-shape-complex-indic-private.hh
src/hb-ot-shape-complex-indic.cc
test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST
test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt [new file with mode: 0644]
test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt [new file with mode: 0644]

index 3c7193d..b87d2df 100644 (file)
@@ -55,16 +55,16 @@ DOTTEDCIRCLE = 13;
 RS   = 14;
 Coeng = 15;
 
-c = C | Ra;
-n = (N.N? | ZWNJ?.RS);
-z = ZWJ|ZWNJ;
-h = H | Coeng;
-matra_group = (M | RS) N? H?;
-syllable_tail = SM? (VD VD?)?;
+c = C | Ra;                    # is_consonant
+n = (N.N? | ZWNJ?.RS);         # is_consonant_modifier
+z = ZWJ|ZWNJ;                  # is_joiner
+h = H | Coeng;                 # is_halant_or_coeng
+matra_group = M.N?.H?;
+syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =   (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail;
+consonant_syllable =   (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
 vowel_syllable =       (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
 standalone_cluster =   (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =                        any;
index 0fe350f..0541738 100644 (file)
@@ -59,7 +59,7 @@ enum indic_category_t {
   OT_A,
   OT_NBSP,
   OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
-  OT_RS, /* Register Shifter (and other marks), used in Khmer OT spec */
+  OT_RS, /* Register Shifter, used in Khmer OT spec */
   OT_Coeng
 };
 
index 4482dd3..3c83ce6 100644 (file)
@@ -275,8 +275,8 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
       info.indic_category() = OT_VD;
 
     if (info.indic_category() == OT_X &&
-       unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D0)))
-      info.indic_category() = OT_RS;
+       unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */
+      info.indic_category() = OT_N;
 
     /* Khmer Virama is different since it can be used to form a final consonant. */
     if (unlikely (info.codepoint == 0x17D2))
@@ -488,7 +488,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   {
     /* Please update the Uniscribe branch when touching this! */
     for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H))))
+      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
        info[i].indic_position() = info[i - 1].indic_position();
   } else {
     /*
@@ -497,7 +497,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
      */
     /* Please update the non-Uniscribe branch when touching this! */
     for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) {
+      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) {
        info[i].indic_position() = info[i - 1].indic_position();
        if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M)
          for (unsigned int j = i; j > start; j--)
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt
new file mode 100644 (file)
index 0000000..213cfc2
--- /dev/null
@@ -0,0 +1,4 @@
+ព់្ឈា
+ព្ឈា៉
+ព្ឈា៌
+ព្ឈ៌ា
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt
new file mode 100644 (file)
index 0000000..0ad62e7
--- /dev/null
@@ -0,0 +1,6 @@
+ព្ឈា
+ព្ឈា់
+ព្ឈ់ា
+ព្ឈ៉ា
+ព៉្ឈា
+ព៌្ឈា