[Indic] Find syllables before any features are applied
authorBehdad Esfahbod <behdad@behdad.org>
Fri, 7 Sep 2012 18:55:07 +0000 (14:55 -0400)
committerBehdad Esfahbod <behdad@behdad.org>
Fri, 7 Sep 2012 18:56:01 +0000 (14:56 -0400)
With FreeSerif, it seems that the 'ccmp' feature does ligature
substituttions.  That was then causing syllable match failures.  We now
find syllables before any features have been applied.

Test sequence: U+0D9A,U+0DCA,U+200D,U+0DBB,U+0DCF

src/hb-ot-shape-complex-indic-machine.rl
src/hb-ot-shape-complex-indic.cc
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt

index 03e3910..b745466 100644 (file)
@@ -82,7 +82,7 @@ main := |*
        consonant_syllable      => { found_syllable (consonant_syllable); };
        vowel_syllable          => { found_syllable (vowel_syllable); };
        standalone_cluster      => { found_syllable (standalone_cluster); };
-       broken_cluster          => { found_syllable (broken_cluster); *had_broken_cluster = true; };
+       broken_cluster          => { found_syllable (broken_cluster); };
        other                   => { found_syllable (non_indic_cluster); };
 *|;
 
@@ -100,7 +100,7 @@ main := |*
   } HB_STMT_END
 
 static void
-find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, bool *had_broken_cluster)
+find_syllables (hb_buffer_t *buffer)
 {
   unsigned int p, pe, eof, ts, te, act;
   int cs;
index 7f6b79a..6a87c1e 100644 (file)
@@ -203,6 +203,10 @@ enum {
 };
 
 static void
+setup_syllables (const hb_ot_shape_plan_t *plan,
+                hb_font_t *font,
+                hb_buffer_t *buffer);
+static void
 initial_reordering (const hb_ot_shape_plan_t *plan,
                    hb_font_t *font,
                    hb_buffer_t *buffer);
@@ -216,6 +220,9 @@ collect_features_indic (hb_ot_shape_planner_t *plan)
 {
   hb_ot_map_builder_t *map = &plan->map;
 
+  /* Do this before any lookups have been applied. */
+  map->add_gsub_pause (setup_syllables);
+
   map->add_bool_feature (HB_TAG('l','o','c','l'));
   /* The Indic specs do not require ccmp, but we apply it here since if
    * there is a use of it, it's typically at the beginning. */
@@ -349,6 +356,17 @@ consonant_position_from_face (const indic_shape_plan_t *indic_plan,
 }
 
 
+enum syllable_type_t {
+  consonant_syllable,
+  vowel_syllable,
+  standalone_cluster,
+  broken_cluster,
+  non_indic_cluster,
+};
+
+#include "hb-ot-shape-complex-indic-machine.hh"
+
+
 static void
 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
                   hb_buffer_t              *buffer,
@@ -365,6 +383,14 @@ setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
     set_indic_properties (buffer->info[i]);
 }
 
+static void
+setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
+                hb_font_t *font HB_UNUSED,
+                hb_buffer_t *buffer)
+{
+  find_syllables (buffer);
+}
+
 static int
 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 {
@@ -788,16 +814,6 @@ initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
 }
 
 
-enum syllable_type_t {
-  consonant_syllable,
-  vowel_syllable,
-  standalone_cluster,
-  broken_cluster,
-  non_indic_cluster,
-};
-
-#include "hb-ot-shape-complex-indic-machine.hh"
-
 static void
 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
                             hb_buffer_t *buffer,
@@ -813,11 +829,23 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
   }
 }
 
-static void
+static inline void
 insert_dotted_circles (const hb_ot_shape_plan_t *plan,
                       hb_font_t *font,
                       hb_buffer_t *buffer)
 {
+  /* Note: This loop is extra overhead, but should not be measurable. */
+  bool has_broken_syllables = false;
+  unsigned int count = buffer->len;
+  for (unsigned int i = 0; i < count; i++)
+    if ((buffer->info[i].syllable() & 0x0F) == broken_cluster) {
+      has_broken_syllables = true;
+      break;
+    }
+  if (likely (!has_broken_syllables))
+    return;
+
+
   hb_codepoint_t dottedcircle_glyph;
   if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph))
     return;
@@ -856,11 +884,7 @@ initial_reordering (const hb_ot_shape_plan_t *plan,
                    hb_buffer_t *buffer)
 {
   update_consonant_positions (plan, font, buffer);
-
-  bool had_broken_clusters = false;
-  find_syllables (plan, buffer, &had_broken_clusters);
-  if (unlikely (had_broken_clusters))
-    insert_dotted_circles (plan, font, buffer);
+  insert_dotted_circles (plan, font, buffer);
 
   hb_glyph_info_t *info = buffer->info;
   unsigned int count = buffer->len;