Imported Upstream version 8.2.2

[platform/upstream/harfbuzz.git] / src / hb-ot-shaper-indic.cc
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shaper-indic.cc

similarity index 81%

rename from src/hb-ot-shape-complex-indic.cc

rename to src/hb-ot-shaper-indic.cc

index 4a8781c..f8c970f 100644 (file)
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shaper-indic.cc
@@ -28,9 +28,9 @@
  
  #ifndef HB_NO_OT_SHAPE
  
-#include "hb-ot-shape-complex-indic.hh"
-#include "hb-ot-shape-complex-indic-machine.hh"
-#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-shaper-indic.hh"
+#include "hb-ot-shaper-indic-machine.hh"
+#include "hb-ot-shaper-vowel-constraints.hh"
  #include "hb-ot-layout.hh"
  
  
@@ -39,6 +39,79 @@
   */
  
  
+static inline void
+set_indic_properties (hb_glyph_info_t &info)
+{
+  hb_codepoint_t u = info.codepoint;
+  unsigned int type = hb_indic_get_categories (u);
+
+  info.indic_category() = (indic_category_t) (type & 0xFFu);
+  info.indic_position() = (indic_position_t) (type >> 8);
+}
+
+
+static inline bool
+is_one_of (const hb_glyph_info_t &info, unsigned int flags)
+{
+  /* If it ligated, all bets are off. */
+  if (_hb_glyph_info_ligated (&info)) return false;
+  return !!(FLAG_UNSAFE (info.indic_category()) & flags);
+}
+
+/* Note:
+ *
+ * We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
+ * cannot happen in a consonant syllable.  The plus side however is, we can call the
+ * consonant syllable logic from the vowel syllable function and get it all right!
+ *
+ * Keep in sync with consonant_categories in the generator. */
+#define CONSONANT_FLAGS_INDIC (FLAG (I_Cat(C)) | FLAG (I_Cat(CS)) | FLAG (I_Cat(Ra)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(V)) | FLAG (I_Cat(PLACEHOLDER)) | FLAG (I_Cat(DOTTEDCIRCLE)))
+
+static inline bool
+is_consonant (const hb_glyph_info_t &info)
+{
+  return is_one_of (info, CONSONANT_FLAGS_INDIC);
+}
+
+#define JOINER_FLAGS (FLAG (I_Cat(ZWJ)) | FLAG (I_Cat(ZWNJ)))
+
+static inline bool
+is_joiner (const hb_glyph_info_t &info)
+{
+  return is_one_of (info, JOINER_FLAGS);
+}
+
+static inline bool
+is_halant (const hb_glyph_info_t &info)
+{
+  return is_one_of (info, FLAG (I_Cat(H)));
+}
+
+struct hb_indic_would_substitute_feature_t
+{
+  void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+  {
+    zero_context = zero_context_;
+    lookups = map->get_stage_lookups (0/*GSUB*/,
+                                     map->get_feature_stage (0/*GSUB*/, feature_tag));
+  }
+
+  bool would_substitute (const hb_codepoint_t *glyphs,
+                        unsigned int          glyphs_count,
+                        hb_face_t            *face) const
+  {
+    for (const auto &lookup : lookups)
+      if (hb_ot_layout_lookup_would_substitute (face, lookup.index, glyphs, glyphs_count, zero_context))
+       return true;
+    return false;
+  }
+
+  private:
+  hb_array_t<const hb_ot_map_t::lookup_map_t> lookups;
+  bool zero_context;
+};
+
+
  /*
   * Indic configurations.  Note that we do not want to keep every single script-specific
   * behavior in these tables necessarily.  This should mainly be used for per-script
@@ -47,10 +120,6 @@
   * instead of adding a new flag in these structs.
   */
  
-enum base_position_t {
-  BASE_POS_LAST_SINHALA,
-  BASE_POS_LAST
-};
  enum reph_position_t {
    REPH_POS_AFTER_MAIN  = POS_AFTER_MAIN,
    REPH_POS_BEFORE_SUB  = POS_BEFORE_SUB,
@@ -72,7 +141,6 @@ struct indic_config_t
    hb_script_t     script;
    bool            has_old_spec;
    hb_codepoint_t  virama;
-  base_position_t base_pos;
    reph_position_t reph_pos;
    reph_mode_t     reph_mode;
    blwf_mode_t     blwf_mode;
@@ -81,26 +149,19 @@ struct indic_config_t
  static const indic_config_t indic_configs[] =
  {
    /* Default.  Should be first. */
-  {HB_SCRIPT_INVALID,  false,      0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_BENGALI,  true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB,  REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_ORIYA,    true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_TAMIL,    true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_TELUGU,   true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
-  {HB_SCRIPT_KANNADA,  true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
-  {HB_SCRIPT_MALAYALAM,        true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
-  {HB_SCRIPT_SINHALA,  false,0x0DCAu,BASE_POS_LAST_SINHALA,
-                                                    REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_INVALID,  false,      0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_BENGALI,  true, 0x09CDu,REPH_POS_AFTER_SUB,  REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_GUJARATI, true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_ORIYA,    true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_TAMIL,    true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+  {HB_SCRIPT_TELUGU,   true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
+  {HB_SCRIPT_KANNADA,  true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
+  {HB_SCRIPT_MALAYALAM,        true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
  };
  
  
-
-/*
- * Indic shaper.
- */
-
  static const hb_ot_map_feature_t
  indic_features[] =
  {
@@ -109,17 +170,17 @@ indic_features[] =
     * These features are applied in order, one at a time, after initial_reordering,
     * constrained to the syllable.
     */
-  {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('r','p','h','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('p','r','e','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('b','l','w','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('a','b','v','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('h','a','l','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('p','s','t','f'),        F_MANUAL_JOINERS},
-  {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('r','p','h','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('p','r','e','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('b','l','w','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('a','b','v','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('h','a','l','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('p','s','t','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    /*
     * Other features.
     * These features are applied all at once, after final_reordering, constrained
@@ -127,12 +188,12 @@ indic_features[] =
     * Default Bengali font in Windows for example has intermixed
     * lookups for init,pres,abvs,blws features.
     */
-  {HB_TAG('i','n','i','t'),        F_MANUAL_JOINERS},
-  {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
-  {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('i','n','i','t'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+  {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
  };
  
  /*
@@ -162,15 +223,15 @@ enum {
    INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */
  };
  
-static void
+static bool
  setup_syllables_indic (const hb_ot_shape_plan_t *plan,
                        hb_font_t *font,
                        hb_buffer_t *buffer);
-static void
+static bool
  initial_reordering_indic (const hb_ot_shape_plan_t *plan,
                           hb_font_t *font,
                           hb_buffer_t *buffer);
-static void
+static bool
  final_reordering_indic (const hb_ot_shape_plan_t *plan,
                         hb_font_t *font,
                         hb_buffer_t *buffer);
@@ -183,10 +244,10 @@ collect_features_indic (hb_ot_shape_planner_t *plan)
    /* Do this before any lookups have been applied. */
    map->add_gsub_pause (setup_syllables_indic);
  
-  map->enable_feature (HB_TAG('l','o','c','l'));
+  map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE);
    /* The Indic specs do not require ccmp, but we apply it here since if
     * there is a use of it, it's typically at the beginning. */
-  map->enable_feature (HB_TAG('c','c','m','p'));
+  map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE);
  
  
    unsigned int i = 0;
@@ -201,14 +262,13 @@ collect_features_indic (hb_ot_shape_planner_t *plan)
  
    for (; i < INDIC_NUM_FEATURES; i++)
      map->add_feature (indic_features[i]);
-
-  map->add_gsub_pause (_hb_clear_syllables);
  }
  
  static void
  override_features_indic (hb_ot_shape_planner_t *plan)
  {
    plan->map.disable_feature (HB_TAG('l','i','g','a'));
+  plan->map.add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var
  }
  
  
@@ -216,7 +276,7 @@ struct indic_shape_plan_t
  {
    bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
    {
-    hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+    hb_codepoint_t glyph = virama_glyph;
      if (unlikely (glyph == (hb_codepoint_t) -1))
      {
        if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
@@ -226,7 +286,7 @@ struct indic_shape_plan_t
  
        /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
         * during shape planning...  Instead, overwrite it here. */
-      virama_glyph.set_relaxed ((int) glyph);
+      virama_glyph = (int) glyph;
      }
  
      *pglyph = glyph;
@@ -270,7 +330,7 @@ data_create_indic (const hb_ot_shape_plan_t *plan)
  #ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE
    indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
  #endif
-  indic_plan->virama_glyph.set_relaxed (-1);
+  indic_plan->virama_glyph = -1;
  
    /* Use zero-context would_substitute() matching for new-spec of the main
     * Indic scripts, and scripts with one spec only, but not for old-specs.
@@ -353,14 +413,16 @@ setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
      set_indic_properties (info[i]);
  }
  
-static void
+static bool
  setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
                        hb_font_t *font HB_UNUSED,
                        hb_buffer_t *buffer)
  {
+  HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
    find_syllables_indic (buffer);
    foreach_syllable (buffer, start, end)
      buffer->unsafe_to_break (start, end);
+  return false;
  }
  
  static int
@@ -369,7 +431,7 @@ compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
    int a = pa->indic_position();
    int b = pb->indic_position();
  
-  return a < b ? -1 : a == b ? 0 : +1;
+  return (int) a - (int) b;
  }
  
  
@@ -381,9 +443,6 @@ update_consonant_positions_indic (const hb_ot_shape_plan_t *plan,
  {
    const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
  
-  if (indic_plan->config->base_pos != BASE_POS_LAST)
-    return;
-
    hb_codepoint_t virama;
    if (indic_plan->load_virama_glyph (font, &virama))
    {
@@ -418,14 +477,12 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
     */
    if (buffer->props.script == HB_SCRIPT_KANNADA &&
        start + 3 <= end &&
-      is_one_of (info[start  ], FLAG (OT_Ra)) &&
-      is_one_of (info[start+1], FLAG (OT_H)) &&
-      is_one_of (info[start+2], FLAG (OT_ZWJ)))
+      is_one_of (info[start  ], FLAG (I_Cat(Ra))) &&
+      is_one_of (info[start+1], FLAG (I_Cat(H))) &&
+      is_one_of (info[start+2], FLAG (I_Cat(ZWJ))))
    {
      buffer->merge_clusters (start+1, start+3);
-    hb_glyph_info_t tmp = info[start+1];
-    info[start+1] = info[start+2];
-    info[start+2] = tmp;
+    hb_swap (info[start+1], info[start+2]);
    }
  
    /* 1. Find base consonant:
@@ -454,7 +511,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
         start + 3 <= end &&
         (
          (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
-        (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ)
+        (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == I_Cat(ZWJ))
         ))
      {
        /* See if it matches the 'rphf' feature. */
@@ -472,7 +529,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
         base = start;
         has_reph = true;
        }
-    } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha)
+    } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == I_Cat(Repha))
      {
         limit += 1;
         while (limit < end && is_joiner (info[limit]))
@@ -481,84 +538,51 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
         has_reph = true;
      }
  
-    switch (indic_plan->config->base_pos)
      {
-      case BASE_POS_LAST:
-      {
-       /* -> starting from the end of the syllable, move backwards */
-       unsigned int i = end;
-       bool seen_below = false;
-       do {
-         i--;
-         /* -> until a consonant is found */
-         if (is_consonant (info[i]))
+      /* -> starting from the end of the syllable, move backwards */
+      unsigned int i = end;
+      bool seen_below = false;
+      do {
+       i--;
+       /* -> until a consonant is found */
+       if (is_consonant (info[i]))
+       {
+         /* -> that does not have a below-base or post-base form
+          * (post-base forms have to follow below-base forms), */
+         if (info[i].indic_position() != POS_BELOW_C &&
+             (info[i].indic_position() != POS_POST_C || seen_below))
           {
-           /* -> that does not have a below-base or post-base form
-            * (post-base forms have to follow below-base forms), */
-           if (info[i].indic_position() != POS_BELOW_C &&
-               (info[i].indic_position() != POS_POST_C || seen_below))
-           {
-             base = i;
-             break;
-           }
-           if (info[i].indic_position() == POS_BELOW_C)
-             seen_below = true;
-
-           /* -> or that is not a pre-base-reordering Ra,
-            *
-            * IMPLEMENTATION NOTES:
-            *
-            * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
-            * by the logic above already.
-            */
-
-           /* -> or arrive at the first consonant. The consonant stopped at will
-            * be the base. */
             base = i;
+           break;
           }
-         else
-         {
-           /* A ZWJ after a Halant stops the base search, and requests an explicit
-            * half form.
-            * A ZWJ before a Halant, requests a subjoined form instead, and hence
-            * search continues.  This is particularly important for Bengali
-            * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
-           if (start < i &&
-               info[i].indic_category() == OT_ZWJ &&
-               info[i - 1].indic_category() == OT_H)
-             break;
-         }
-       } while (i > limit);
-      }
-      break;
+         if (info[i].indic_position() == POS_BELOW_C)
+           seen_below = true;
  
-      case BASE_POS_LAST_SINHALA:
-      {
-       /* Sinhala base positioning is slightly different from main Indic, in that:
-        * 1. Its ZWJ behavior is different,
-        * 2. We don't need to look into the font for consonant positions.
-        */
-
-       if (!has_reph)
-         base = limit;
-
-       /* Find the last base consonant that is not blocked by ZWJ.  If there is
-        * a ZWJ right before a base consonant, that would request a subjoined form. */
-       for (unsigned int i = limit; i < end; i++)
-         if (is_consonant (info[i]))
-         {
-           if (limit < i && info[i - 1].indic_category() == OT_ZWJ)
-             break;
-           else
-             base = i;
-         }
+         /* -> or that is not a pre-base-reordering Ra,
+          *
+          * IMPLEMENTATION NOTES:
+          *
+          * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
+          * by the logic above already.
+          */
  
-       /* Mark all subsequent consonants as below. */
-       for (unsigned int i = base + 1; i < end; i++)
-         if (is_consonant (info[i]))
-           info[i].indic_position() = POS_BELOW_C;
-      }
-      break;
+         /* -> or arrive at the first consonant. The consonant stopped at will
+          * be the base. */
+         base = i;
+       }
+       else
+       {
+         /* A ZWJ after a Halant stops the base search, and requests an explicit
+          * half form.
+          * A ZWJ before a Halant, requests a subjoined form instead, and hence
+          * search continues.  This is particularly important for Bengali
+          * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
+         if (start < i &&
+             info[i].indic_category() == I_Cat(ZWJ) &&
+             info[i - 1].indic_category() == I_Cat(H))
+           break;
+       }
+      } while (i > limit);
      }
  
      /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
@@ -613,18 +637,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    if (base < end)
      info[base].indic_position() = POS_BASE_C;
  
-  /* Mark final consonants.  A final consonant is one appearing after a matra.
-   * Happens in Sinhala. */
-  for (unsigned int i = base + 1; i < end; i++)
-    if (info[i].indic_category() == OT_M) {
-      for (unsigned int j = i + 1; j < end; j++)
-       if (is_consonant (info[j])) {
-        info[j].indic_position() = POS_FINAL_C;
-        break;
-       }
-      break;
-    }
-
    /* Handle beginning Ra */
    if (has_reph)
      info[start].indic_position() = POS_RA_TO_BECOME_REPH;
@@ -661,14 +673,14 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    {
      bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
      for (unsigned int i = base + 1; i < end; i++)
-      if (info[i].indic_category() == OT_H)
+      if (info[i].indic_category() == I_Cat(H))
        {
         unsigned int j;
         for (j = end - 1; j > i; j--)
           if (is_consonant (info[j]) ||
-             (disallow_double_halants && info[j].indic_category() == OT_H))
+             (disallow_double_halants && info[j].indic_category() == I_Cat(H)))
             break;
-       if (info[j].indic_category() != OT_H && j > i) {
+       if (info[j].indic_category() != I_Cat(H) && j > i) {
           /* Move Halant to after last consonant. */
           hb_glyph_info_t t = info[i];
           memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
@@ -683,20 +695,16 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
      indic_position_t last_pos = POS_START;
      for (unsigned int i = start; i < end; i++)
      {
-      if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+      if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (I_Cat(N)) | FLAG (I_Cat(RS)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(H)))))
        {
         info[i].indic_position() = last_pos;
-       if (unlikely (info[i].indic_category() == OT_H &&
+       if (unlikely (info[i].indic_category() == I_Cat(H) &&
                       info[i].indic_position() == POS_PRE_M))
         {
           /*
            * Uniscribe doesn't move the Halant with Left Matra.
-          * TEST: U+092B,U+093F,U+094DE
-          * We follow.  This is important for the Sinhala
-          * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
-          * where U+0DD9 is a left matra and U+0DCA is the virama.
-          * We don't want to move the virama with the left matra.
-          * TEST: U+0D9A,U+0DDA
+          * TEST: U+092B,U+093F,U+094D
+          * We follow.
            */
           for (unsigned int j = i; j > start; j--)
             if (info[j - 1].indic_position() != POS_PRE_M) {
@@ -705,6 +713,9 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
             }
         }
        } else if (info[i].indic_position() != POS_SMVD) {
+       if (info[i].indic_category() == I_Cat(MPst) &&
+           i > start && info[i - 1].indic_category() == I_Cat(SM))
+         info[i - 1].indic_position() = info[i].indic_position();
         last_pos = (indic_position_t) info[i].indic_position();
        }
      }
@@ -720,7 +731,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
           if (info[j].indic_position() < POS_SMVD)
             info[j].indic_position() = info[i].indic_position();
         last = i;
-      } else if (info[i].indic_category() == OT_M)
+      } else if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
         last = i;
    }
  
@@ -733,14 +744,40 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
  
      /* Sit tight, rock 'n roll! */
      hb_stable_sort (info + start, end - start, compare_indic_order);
-    /* Find base again */
+
+    /* Find base again; also flip left-matra sequence. */
+    unsigned first_left_matra = end;
+    unsigned last_left_matra = end;
      base = end;
      for (unsigned int i = start; i < end; i++)
+    {
        if (info[i].indic_position() == POS_BASE_C)
        {
         base = i;
         break;
        }
+      else if (info[i].indic_position() == POS_PRE_M)
+      {
+        if (first_left_matra == end)
+         first_left_matra = i;
+       last_left_matra = i;
+      }
+    }
+    /* https://github.com/harfbuzz/harfbuzz/issues/3863 */
+    if (first_left_matra < last_left_matra)
+    {
+      /* No need to merge clusters, handled later. */
+      buffer->reverse_range (first_left_matra, last_left_matra + 1);
+      /* Reverse back nuktas, etc. */
+      unsigned i = first_left_matra;
+      for (unsigned j = i; j <= last_left_matra; j++)
+       if (FLAG_UNSAFE (info[j].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
+       {
+         buffer->reverse_range (i, j + 1);
+         i = j + 1;
+       }
+    }
+
      /* Things are out-of-control for post base positions, they may shuffle
       * around like crazy.  In old-spec mode, we move halants around, so in
       * that case merge all clusters after base.  Otherwise, check the sort
@@ -851,10 +888,10 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
       * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
       */
      for (unsigned int i = start; i + 1 < base; i++)
-      if (info[i  ].indic_category() == OT_Ra &&
-         info[i+1].indic_category() == OT_H  &&
+      if (info[i  ].indic_category() == I_Cat(Ra) &&
+         info[i+1].indic_category() == I_Cat(H)  &&
           (i + 2 == base ||
-          info[i+2].indic_category() != OT_ZWJ))
+          info[i+2].indic_category() != I_Cat(ZWJ)))
        {
         info[i  ].mask |= indic_plan->mask_array[INDIC_BLWF];
         info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF];
@@ -881,7 +918,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    /* Apply ZWJ/ZWNJ effects */
    for (unsigned int i = start + 1; i < end; i++)
      if (is_joiner (info[i])) {
-      bool non_joiner = info[i].indic_category() == OT_ZWNJ;
+      bool non_joiner = info[i].indic_category() == I_Cat(ZWNJ);
        unsigned int j = i;
  
        do {
@@ -914,7 +951,7 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
      /* For dotted-circle, this is what Uniscribe does:
       * If dotted-circle is the last glyph, it just does nothing.
       * Ie. It doesn't form Reph. */
-    if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
+    if (buffer->info[end - 1].indic_category() == I_Cat(DOTTEDCIRCLE))
        return;
    }
  
@@ -946,25 +983,29 @@ initial_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
    }
  }
  
-static void
+static bool
  initial_reordering_indic (const hb_ot_shape_plan_t *plan,
                           hb_font_t *font,
                           hb_buffer_t *buffer)
  {
+  bool ret = false;
    if (!buffer->message (font, "start reordering indic initial"))
-    return;
+    return ret;
  
    update_consonant_positions_indic (plan, font, buffer);
-  hb_syllabic_insert_dotted_circles (font, buffer,
-                                    indic_broken_cluster,
-                                    OT_DOTTEDCIRCLE,
-                                    OT_Repha,
-                                    POS_END);
+  if (hb_syllabic_insert_dotted_circles (font, buffer,
+                                        indic_broken_cluster,
+                                        I_Cat(DOTTEDCIRCLE),
+                                        I_Cat(Repha),
+                                        POS_END))
+    ret = true;
  
    foreach_syllable (buffer, start, end)
      initial_reordering_syllable_indic (plan, font->face, buffer, start, end);
  
    (void) buffer->message (font, "end reordering indic initial");
+
+  return ret;
  }
  
  static void
@@ -980,10 +1021,10 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
     * and possibly multiple substitutions happened prior to this
     * phase, and that might have messed up our properties.  Recover
     * from a particular case of that where we're fairly sure that a
-   * class of OT_H is desired but has been lost. */
+   * class of I_Cat(H) is desired but has been lost. */
    /* We don't call load_virama_glyph(), since we know it's already
     * loaded. */
-  hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+  hb_codepoint_t virama_glyph = indic_plan->virama_glyph;
    if (virama_glyph)
    {
      for (unsigned int i = start; i < end; i++)
@@ -992,7 +1033,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
           _hb_glyph_info_multiplied (&info[i]))
        {
         /* This will make sure that this glyph passes is_halant() test. */
-       info[i].indic_category() = OT_H;
+       info[i].indic_category() = I_Cat(H);
         _hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
        }
    }
@@ -1026,12 +1067,15 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
               base = i;
               while (base < end && is_halant (info[base]))
                 base++;
-             info[base].indic_position() = POS_BASE_C;
+             if (base < end)
+               info[base].indic_position() = POS_BASE_C;
  
               try_pref = false;
             }
             break;
           }
+       if (base == end)
+         break;
        }
        /* For Malayalam, skip over unformed below- (but NOT post-) forms. */
        if (buffer->props.script == HB_SCRIPT_MALAYALAM)
@@ -1058,11 +1102,11 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
        break;
      }
    if (base == end && start < base &&
-      is_one_of (info[base - 1], FLAG (OT_ZWJ)))
+      is_one_of (info[base - 1], FLAG (I_Cat(ZWJ))))
      base--;
    if (base < end)
      while (start < base &&
-          is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_H))))
+          is_one_of (info[base], (FLAG (I_Cat(N)) | FLAG (I_Cat(H)))))
        base--;
  
  
@@ -1107,7 +1151,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
      {
      search:
        while (new_pos > start &&
-            !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
+            !(is_one_of (info[new_pos], (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H))))))
         new_pos--;
  
        /* If we found no Halant we are done.
@@ -1124,7 +1168,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
         if (new_pos + 1 < end)
         {
           /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
-         if (info[new_pos + 1].indic_category() == OT_ZWJ)
+         if (info[new_pos + 1].indic_category() == I_Cat(ZWJ))
           {
             /* Keep searching. */
             if (new_pos > start)
@@ -1197,7 +1241,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
     */
    if (start + 1 < end &&
        info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
-      ((info[start].indic_category() == OT_Repha) ^
+      ((info[start].indic_category() == I_Cat(Repha)) ^
         _hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
    {
      unsigned int new_reph_pos;
@@ -1307,7 +1351,8 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
           unlikely (is_halant (info[new_reph_pos])))
        {
         for (unsigned int i = base + 1; i < new_reph_pos; i++)
-         if (info[i].indic_category() == OT_M) {
+         if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
+         {
             /* Ok, got it. */
             new_reph_pos--;
           }
@@ -1367,7 +1412,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
           if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
           {
             while (new_pos > start &&
-                  !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_H))))
+                  !(is_one_of (info[new_pos - 1], FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H)))))
               new_pos--;
           }
  
@@ -1416,11 +1461,10 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
      switch ((hb_tag_t) plan->props.script)
      {
        case HB_SCRIPT_TAMIL:
-      case HB_SCRIPT_SINHALA:
         break;
  
        default:
-       /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
+       /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil.
          * This means, half forms are submerged into the main consonant's cluster.
          * This is unnecessary, and makes cursor positioning harder, but that's what
          * Uniscribe does. */
@@ -1431,13 +1475,13 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
  }
  
  
-static void
+static bool
  final_reordering_indic (const hb_ot_shape_plan_t *plan,
                         hb_font_t *font HB_UNUSED,
                         hb_buffer_t *buffer)
  {
    unsigned int count = buffer->len;
-  if (unlikely (!count)) return;
+  if (unlikely (!count)) return false;
  
    if (buffer->message (font, "start reordering indic final")) {
      foreach_syllable (buffer, start, end)
@@ -1447,6 +1491,8 @@ final_reordering_indic (const hb_ot_shape_plan_t *plan,
  
    HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
    HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
+
+  return false;
  }
  
  
@@ -1455,7 +1501,9 @@ preprocess_text_indic (const hb_ot_shape_plan_t *plan,
                        hb_buffer_t              *buffer,
                        hb_font_t                *font)
  {
-  _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+  const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+  if (!indic_plan->uniscribe_bug_compatible)
+    _hb_preprocess_text_vowel_constraints (plan, buffer, font);
  }
  
  static bool
@@ -1488,48 +1536,6 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c,
  #endif
    }
  
-  if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu)))
-  {
-    /*
-     * Sinhala split matras...  Let the fun begin.
-     *
-     * These four characters have Unicode decompositions.  However, Uniscribe
-     * decomposes them "Khmer-style", that is, it uses the character itself to
-     * get the second half.  The first half of all four decompositions is always
-     * U+0DD9.
-     *
-     * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
-     * broken with Uniscribe.  But we need to support them.  As such, we only
-     * do the Uniscribe-style decomposition if the character is transformed into
-     * its "sec.half" form by the 'pstf' feature.  Otherwise, we fall back to
-     * Unicode decomposition.
-     *
-     * Note that we can't unconditionally use Unicode decomposition.  That would
-     * break some other fonts, that are designed to work with Uniscribe, and
-     * don't have positioning features for the Unicode-style decomposition.
-     *
-     * Argh...
-     *
-     * The Uniscribe behavior is now documented in the newly published Sinhala
-     * spec in 2012:
-     *
-     *   https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
-     */
-
-
-    const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
-    hb_codepoint_t glyph;
-    if (indic_plan->uniscribe_bug_compatible ||
-       (c->font->get_nominal_glyph (ab, &glyph) &&
-        indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))
-    {
-      /* Ok, safe to use Uniscribe-style decomposition. */
-      *a = 0x0DD9u;
-      *b = ab;
-      return true;
-    }
-  }
-
    return (bool) c->unicode->decompose (ab, a, b);
  }
  
@@ -1550,7 +1556,7 @@ compose_indic (const hb_ot_shape_normalize_context_t *c,
  }
  
  
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
+const hb_ot_shaper_t _hb_ot_shaper_indic =
  {
    collect_features_indic,
    override_features_indic,
@@ -1558,12 +1564,12 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
    data_destroy_indic,
    preprocess_text_indic,
    nullptr, /* postprocess_glyphs */
-  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
    decompose_indic,
    compose_indic,
    setup_masks_indic,
-  HB_TAG_NONE, /* gpos_tag */
    nullptr, /* reorder_marks */
+  HB_TAG_NONE, /* gpos_tag */
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
    HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
    false, /* fallback_position */
  };