src/hb-ot-shape-complex-indic.cc

   1 /*
   2  * Copyright © 2011,2012  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26
  27 #include "hb-ot-shape-complex-indic-private.hh"
  28 #include "hb-ot-layout-private.hh"
  29
  30
  31 /*
  32  * Global Indic shaper options.
  33  */
  34
  35 struct indic_options_t
  36 {
  37   int initialized : 1;
  38   int uniscribe_bug_compatible : 1;
  39 };
  40
  41 union indic_options_union_t {
  42   int i;
  43   indic_options_t opts;
  44 };
  45 ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t));
  46
  47 static indic_options_union_t
  48 indic_options_init (void)
  49 {
  50   indic_options_union_t u;
  51   u.i = 0;
  52   u.opts.initialized = 1;
  53
  54   char *c = getenv ("HB_OT_INDIC_OPTIONS");
  55   u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
  56
  57   return u;
  58 }
  59
  60 static inline indic_options_t
  61 indic_options (void)
  62 {
  63   static indic_options_union_t options;
  64
  65   if (unlikely (!options.i)) {
  66     /* This is idempotent and threadsafe. */
  67     options = indic_options_init ();
  68   }
  69
  70   return options.opts;
  71 }
  72
  73
  74 /*
  75  * Indic configurations.  Note that we do not want to keep every single script-specific
  76  * behavior in these tables necessarily.  This should mainly be used for per-script
  77  * properties that are cheaper keeping here, than in the code.  Ie. if, say, one and
  78  * only one script has an exception, that one script can be if'ed directly in the code,
  79  * instead of adding a new flag in these structs.
  80  */
  81
  82 enum base_position_t {
  83   BASE_POS_FIRST,
  84   BASE_POS_LAST
  85 };
  86 enum reph_position_t {
  87   REPH_POS_DEFAULT     = POS_BEFORE_POST,
  88
  89   REPH_POS_AFTER_MAIN  = POS_AFTER_MAIN,
  90   REPH_POS_BEFORE_SUB  = POS_BEFORE_SUB,
  91   REPH_POS_AFTER_SUB   = POS_AFTER_SUB,
  92   REPH_POS_BEFORE_POST = POS_BEFORE_POST,
  93   REPH_POS_AFTER_POST  = POS_AFTER_POST
  94 };
  95 enum reph_mode_t {
  96   REPH_MODE_IMPLICIT,  /* Reph formed out of initial Ra,H sequence. */
  97   REPH_MODE_EXPLICIT,  /* Reph formed out of initial Ra,H,ZWJ sequence. */
  98   REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */
  99   REPH_MODE_LOG_REPHA  /* Encoded Repha character, needs reordering. */
 100 };
 101 struct indic_config_t
 102 {
 103   hb_script_t     script;
 104   bool            has_old_spec;
 105   hb_codepoint_t  virama;
 106   base_position_t base_pos;
 107   reph_position_t reph_pos;
 108   reph_mode_t     reph_mode;
 109 };
 110
 111 static const indic_config_t indic_configs[] =
 112 {
 113   /* Default.  Should be first. */
 114   {HB_SCRIPT_INVALID,   false,     0,BASE_POS_LAST, REPH_POS_DEFAULT,    REPH_MODE_IMPLICIT},
 115   {HB_SCRIPT_DEVANAGARI,true, 0x094D,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT},
 116   {HB_SCRIPT_BENGALI,   true, 0x09CD,BASE_POS_LAST, REPH_POS_AFTER_SUB,  REPH_MODE_IMPLICIT},
 117   {HB_SCRIPT_GURMUKHI,  true, 0x0A4D,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT},
 118   {HB_SCRIPT_GUJARATI,  true, 0x0ACD,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT},
 119   {HB_SCRIPT_ORIYA,     true, 0x0B4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT},
 120   {HB_SCRIPT_TAMIL,     true, 0x0BCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT},
 121   {HB_SCRIPT_TELUGU,    true, 0x0C4D,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT},
 122   {HB_SCRIPT_KANNADA,   true, 0x0CCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT},
 123   {HB_SCRIPT_MALAYALAM, true, 0x0D4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA},
 124   {HB_SCRIPT_SINHALA,   false,0x0DCA,BASE_POS_FIRST,REPH_POS_AFTER_MAIN, REPH_MODE_EXPLICIT},
 125   {HB_SCRIPT_KHMER,     false,0x17D2,BASE_POS_FIRST,REPH_POS_DEFAULT,    REPH_MODE_VIS_REPHA},
 126   /* Myanmar does not have the "old_indic" behavior, even though it has a "new" tag. */
 127   {HB_SCRIPT_MYANMAR,   false,0x1039,BASE_POS_LAST, REPH_POS_DEFAULT,    REPH_MODE_EXPLICIT},
 128 };
 129
 130
 131
 132 /*
 133  * Indic shaper.
 134  */
 135
 136 struct feature_list_t {
 137   hb_tag_t tag;
 138   hb_bool_t is_global;
 139 };
 140
 141 static const feature_list_t
 142 indic_features[] =
 143 {
 144   /*
 145    * Basic features.
 146    * These features are applied in order, one at a time, after initial_reordering.
 147    */
 148   {HB_TAG('n','u','k','t'), true},
 149   {HB_TAG('a','k','h','n'), true},
 150   {HB_TAG('r','p','h','f'), false},
 151   {HB_TAG('r','k','r','f'), true},
 152   {HB_TAG('p','r','e','f'), false},
 153   {HB_TAG('h','a','l','f'), false},
 154   {HB_TAG('b','l','w','f'), false},
 155   {HB_TAG('a','b','v','f'), false},
 156   {HB_TAG('p','s','t','f'), false},
 157   {HB_TAG('c','f','a','r'), false},
 158   {HB_TAG('c','j','c','t'), true},
 159   {HB_TAG('v','a','t','u'), true},
 160   /*
 161    * Other features.
 162    * These features are applied all at once, after final_reordering.
 163    */
 164   {HB_TAG('i','n','i','t'), false},
 165   {HB_TAG('p','r','e','s'), true},
 166   {HB_TAG('a','b','v','s'), true},
 167   {HB_TAG('b','l','w','s'), true},
 168   {HB_TAG('p','s','t','s'), true},
 169   {HB_TAG('h','a','l','n'), true},
 170   /* Positioning features, though we don't care about the types. */
 171   {HB_TAG('d','i','s','t'), true},
 172   {HB_TAG('a','b','v','m'), true},
 173   {HB_TAG('b','l','w','m'), true},
 174 };
 175
 176 /*
 177  * Must be in the same order as the indic_features array.
 178  */
 179 enum {
 180   _NUKT,
 181   _AKHN,
 182   RPHF,
 183   _RKRF,
 184   PREF,
 185   HALF,
 186   BLWF,
 187   ABVF,
 188   PSTF,
 189   CFAR,
 190   _CJCT,
 191   _VATU,
 192
 193   INIT,
 194   _PRES,
 195   _ABVS,
 196   _BLWS,
 197   _PSTS,
 198   _HALN,
 199   _DIST,
 200   _ABVM,
 201   _BLWM,
 202
 203   INDIC_NUM_FEATURES,
 204   INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */
 205 };
 206
 207 static void
 208 setup_syllables (const hb_ot_shape_plan_t *plan,
 209                  hb_font_t *font,
 210                  hb_buffer_t *buffer);
 211 static void
 212 initial_reordering (const hb_ot_shape_plan_t *plan,
 213                     hb_font_t *font,
 214                     hb_buffer_t *buffer);
 215 static void
 216 final_reordering (const hb_ot_shape_plan_t *plan,
 217                   hb_font_t *font,
 218                   hb_buffer_t *buffer);
 219
 220 static void
 221 collect_features_indic (hb_ot_shape_planner_t *plan)
 222 {
 223   hb_ot_map_builder_t *map = &plan->map;
 224
 225   /* Do this before any lookups have been applied. */
 226   map->add_gsub_pause (setup_syllables);
 227
 228   map->add_bool_feature (HB_TAG('l','o','c','l'));
 229   /* The Indic specs do not require ccmp, but we apply it here since if
 230    * there is a use of it, it's typically at the beginning. */
 231   map->add_bool_feature (HB_TAG('c','c','m','p'));
 232
 233
 234   unsigned int i = 0;
 235   map->add_gsub_pause (initial_reordering);
 236   for (; i < INDIC_BASIC_FEATURES; i++) {
 237     map->add_bool_feature (indic_features[i].tag, indic_features[i].is_global);
 238     map->add_gsub_pause (NULL);
 239   }
 240   map->add_gsub_pause (final_reordering);
 241   for (; i < INDIC_NUM_FEATURES; i++) {
 242     map->add_bool_feature (indic_features[i].tag, indic_features[i].is_global);
 243   }
 244 }
 245
 246 static void
 247 override_features_indic (hb_ot_shape_planner_t *plan)
 248 {
 249   /* Uniscribe does not apply 'kern'. */
 250   if (indic_options ().uniscribe_bug_compatible)
 251     plan->map.add_feature (HB_TAG('k','e','r','n'), 0, true);
 252
 253   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, true);
 254 }
 255
 256
 257 struct would_substitute_feature_t
 258 {
 259   inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag)
 260   {
 261     map->get_stage_lookups (0/*GSUB*/,
 262                             map->get_feature_stage (0/*GSUB*/, feature_tag),
 263                             &lookups, &count);
 264   }
 265
 266   inline bool would_substitute (hb_codepoint_t    *glyphs,
 267                                 unsigned int       glyphs_count,
 268                                 bool               zero_context,
 269                                 hb_face_t         *face) const
 270   {
 271     for (unsigned int i = 0; i < count; i++)
 272       if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
 273         return true;
 274     return false;
 275   }
 276
 277   private:
 278   const hb_ot_map_t::lookup_map_t *lookups;
 279   unsigned int count;
 280 };
 281
 282 struct indic_shape_plan_t
 283 {
 284   ASSERT_POD ();
 285
 286   inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
 287   {
 288     hb_codepoint_t glyph = virama_glyph;
 289     if (unlikely (virama_glyph == (hb_codepoint_t) -1))
 290     {
 291       if (!config->virama || !font->get_glyph (config->virama, 0, &glyph))
 292         glyph = 0;
 293       /* Technically speaking, the spec says we should apply 'locl' to virama too.
 294        * Maybe one day... */
 295
 296       /* Our get_glyph() function needs a font, so we can't get the virama glyph
 297        * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
 298       (const_cast<indic_shape_plan_t *> (this))->virama_glyph = glyph;
 299     }
 300
 301     *pglyph = glyph;
 302     return glyph != 0;
 303   }
 304
 305   const indic_config_t *config;
 306
 307   bool is_old_spec;
 308   hb_codepoint_t virama_glyph;
 309
 310   would_substitute_feature_t rphf;
 311   would_substitute_feature_t pref;
 312   would_substitute_feature_t blwf;
 313   would_substitute_feature_t pstf;
 314
 315   hb_mask_t mask_array[INDIC_NUM_FEATURES];
 316 };
 317
 318 static void *
 319 data_create_indic (const hb_ot_shape_plan_t *plan)
 320 {
 321   indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (indic_shape_plan_t));
 322   if (unlikely (!indic_plan))
 323     return NULL;
 324
 325   indic_plan->config = &indic_configs[0];
 326   for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)
 327     if (plan->props.script == indic_configs[i].script) {
 328       indic_plan->config = &indic_configs[i];
 329       break;
 330     }
 331
 332   indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FF) != '2');
 333   indic_plan->virama_glyph = (hb_codepoint_t) -1;
 334
 335   indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'));
 336   indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'));
 337   indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'));
 338   indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'));
 339
 340   for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)
 341     indic_plan->mask_array[i] = indic_features[i].is_global ? 0 : plan->map.get_1_mask (indic_features[i].tag);
 342
 343   return indic_plan;
 344 }
 345
 346 static void
 347 data_destroy_indic (void *data)
 348 {
 349   free (data);
 350 }
 351
 352 static indic_position_t
 353 consonant_position_from_face (const indic_shape_plan_t *indic_plan,
 354                               hb_codepoint_t *glyphs, unsigned int glyphs_len,
 355                               hb_face_t      *face)
 356 {
 357   bool zero_context = indic_plan->is_old_spec ? false : true;
 358   if (indic_plan->pref.would_substitute (glyphs, glyphs_len, zero_context, face)) return POS_BELOW_C;
 359   if (indic_plan->blwf.would_substitute (glyphs, glyphs_len, zero_context, face)) return POS_BELOW_C;
 360   if (indic_plan->pstf.would_substitute (glyphs, glyphs_len, zero_context, face)) return POS_POST_C;
 361   return POS_BASE_C;
 362 }
 363
 364
 365 enum syllable_type_t {
 366   consonant_syllable,
 367   vowel_syllable,
 368   standalone_cluster,
 369   broken_cluster,
 370   non_indic_cluster,
 371 };
 372
 373 #include "hb-ot-shape-complex-indic-machine.hh"
 374
 375
 376 static void
 377 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
 378                    hb_buffer_t              *buffer,
 379                    hb_font_t                *font HB_UNUSED)
 380 {
 381   HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);
 382   HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);
 383
 384   /* We cannot setup masks here.  We save information about characters
 385    * and setup masks later on in a pause-callback. */
 386
 387   unsigned int count = buffer->len;
 388   for (unsigned int i = 0; i < count; i++)
 389     set_indic_properties (buffer->info[i]);
 390 }
 391
 392 static void
 393 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 394                  hb_font_t *font HB_UNUSED,
 395                  hb_buffer_t *buffer)
 396 {
 397   find_syllables (buffer);
 398 }
 399
 400 static int
 401 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 402 {
 403   int a = pa->indic_position();
 404   int b = pb->indic_position();
 405
 406   return a < b ? -1 : a == b ? 0 : +1;
 407 }
 408
 409
 410
 411 static void
 412 update_consonant_positions (const hb_ot_shape_plan_t *plan,
 413                             hb_font_t         *font,
 414                             hb_buffer_t       *buffer)
 415 {
 416   const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
 417
 418   unsigned int consonant_pos = indic_plan->is_old_spec ? 0 : 1;
 419   hb_codepoint_t glyphs[2];
 420   if (indic_plan->get_virama_glyph (font, &glyphs[1 - consonant_pos]))
 421   {
 422     hb_face_t *face = font->face;
 423     unsigned int count = buffer->len;
 424     for (unsigned int i = 0; i < count; i++)
 425       if (buffer->info[i].indic_position() == POS_BASE_C) {
 426         glyphs[consonant_pos] = buffer->info[i].codepoint;
 427         buffer->info[i].indic_position() = consonant_position_from_face (indic_plan, glyphs, 2, face);
 428       }
 429   }
 430 }
 431
 432
 433 /* Rules from:
 434  * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
 435
 436 static void
 437 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
 438                                        hb_face_t *face,
 439                                        hb_buffer_t *buffer,
 440                                        unsigned int start, unsigned int end)
 441 {
 442   const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
 443   hb_glyph_info_t *info = buffer->info;
 444
 445
 446   /* 1. Find base consonant:
 447    *
 448    * The shaping engine finds the base consonant of the syllable, using the
 449    * following algorithm: starting from the end of the syllable, move backwards
 450    * until a consonant is found that does not have a below-base or post-base
 451    * form (post-base forms have to follow below-base forms), or that is not a
 452    * pre-base reordering Ra, or arrive at the first consonant. The consonant
 453    * stopped at will be the base.
 454    *
 455    *   o If the syllable starts with Ra + Halant (in a script that has Reph)
 456    *     and has more than one consonant, Ra is excluded from candidates for
 457    *     base consonants.
 458    */
 459
 460   unsigned int base = end;
 461   bool has_reph = false;
 462
 463   {
 464     /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 465      *    and has more than one consonant, Ra is excluded from candidates for
 466      *    base consonants. */
 467     unsigned int limit = start;
 468     if (indic_plan->mask_array[RPHF] &&
 469         start + 3 <= end &&
 470         (
 471          (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
 472          (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ)
 473         ))
 474     {
 475       /* See if it matches the 'rphf' feature. */
 476       hb_codepoint_t glyphs[2] = {info[start].codepoint, info[start + 1].codepoint};
 477       if (indic_plan->rphf.would_substitute (glyphs, ARRAY_LENGTH (glyphs), true, face))
 478       {
 479         limit += 2;
 480         while (limit < end && is_joiner (info[limit]))
 481           limit++;
 482         base = start;
 483         has_reph = true;
 484       }
 485     } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha)
 486     {
 487         limit += 1;
 488         while (limit < end && is_joiner (info[limit]))
 489           limit++;
 490         base = start;
 491         has_reph = true;
 492     }
 493
 494     switch (indic_plan->config->base_pos)
 495     {
 496       default:
 497         assert (false);
 498         /* fallthrough */
 499
 500       case BASE_POS_LAST:
 501       {
 502         /* -> starting from the end of the syllable, move backwards */
 503         unsigned int i = end;
 504         bool seen_below = false;
 505         do {
 506           i--;
 507           /* -> until a consonant is found */
 508           if (is_consonant (info[i]))
 509           {
 510             /* -> that does not have a below-base or post-base form
 511              * (post-base forms have to follow below-base forms), */
 512             if (info[i].indic_position() != POS_BELOW_C &&
 513                 (info[i].indic_position() != POS_POST_C || seen_below))
 514             {
 515               base = i;
 516               break;
 517             }
 518             if (info[i].indic_position() == POS_BELOW_C)
 519               seen_below = true;
 520
 521             /* -> or that is not a pre-base reordering Ra,
 522              *
 523              * IMPLEMENTATION NOTES:
 524              *
 525              * Our pre-base reordering Ra's are marked POS_BELOW, so will be skipped
 526              * by the logic above already.
 527              */
 528
 529             /* -> or arrive at the first consonant. The consonant stopped at will
 530              * be the base. */
 531             base = i;
 532           }
 533           else
 534           {
 535             /* A ZWJ after a Halant stops the base search, and requests an explicit
 536              * half form.
 537              * A ZWJ before a Halant, requests a subjoined form instead, and hence
 538              * search continues.  This is particularly important for Bengali
 539              * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
 540             if (start < i &&
 541                 info[i].indic_category() == OT_ZWJ &&
 542                 info[i - 1].indic_category() == OT_H)
 543               break;
 544           }
 545         } while (i > limit);
 546       }
 547       break;
 548
 549       case BASE_POS_FIRST:
 550       {
 551         /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
 552
 553         if (!has_reph)
 554           base = limit;
 555
 556         /* Find the last base consonant that is not blocked by ZWJ.  If there is
 557          * a ZWJ right before a base consonant, that would request a subjoined form. */
 558         for (unsigned int i = limit; i < end; i++)
 559           if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
 560           {
 561             if (limit < i && info[i - 1].indic_category() == OT_ZWJ)
 562               break;
 563             else
 564               base = i;
 565           }
 566
 567         /* Mark all subsequent consonants as below. */
 568         for (unsigned int i = base + 1; i < end; i++)
 569           if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
 570             info[i].indic_position() = POS_BELOW_C;
 571       }
 572       break;
 573     }
 574
 575     /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 576      *    and has more than one consonant, Ra is excluded from candidates for
 577      *    base consonants.
 578      *
 579      *  Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
 580     if (has_reph && base == start && start - limit <= 2) {
 581       /* Have no other consonant, so Reph is not formed and Ra becomes base. */
 582       has_reph = false;
 583     }
 584   }
 585
 586   if (base < end)
 587     info[base].indic_position() = POS_BASE_C;
 588
 589
 590   /* 2. Decompose and reorder Matras:
 591    *
 592    * Each matra and any syllable modifier sign in the cluster are moved to the
 593    * appropriate position relative to the consonant(s) in the cluster. The
 594    * shaping engine decomposes two- or three-part matras into their constituent
 595    * parts before any repositioning. Matra characters are classified by which
 596    * consonant in a conjunct they have affinity for and are reordered to the
 597    * following positions:
 598    *
 599    *   o Before first half form in the syllable
 600    *   o After subjoined consonants
 601    *   o After post-form consonant
 602    *   o After main consonant (for above marks)
 603    *
 604    * IMPLEMENTATION NOTES:
 605    *
 606    * The normalize() routine has already decomposed matras for us, so we don't
 607    * need to worry about that.
 608    */
 609
 610
 611   /* 3.  Reorder marks to canonical order:
 612    *
 613    * Adjacent nukta and halant or nukta and vedic sign are always repositioned
 614    * if necessary, so that the nukta is first.
 615    *
 616    * IMPLEMENTATION NOTES:
 617    *
 618    * We don't need to do this: the normalize() routine already did this for us.
 619    */
 620
 621
 622   /* Reorder characters */
 623
 624   for (unsigned int i = start; i < base; i++)
 625     info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_position());
 626
 627   if (base < end)
 628     info[base].indic_position() = POS_BASE_C;
 629
 630   /* Mark final consonants.  A final consonant is one appearing after a matra,
 631    * like in Khmer. */
 632   for (unsigned int i = base + 1; i < end; i++)
 633     if (info[i].indic_category() == OT_M) {
 634       for (unsigned int j = i + 1; j < end; j++)
 635         if (is_consonant (info[j])) {
 636           info[j].indic_position() = POS_FINAL_C;
 637           break;
 638         }
 639       break;
 640     }
 641
 642   /* Handle beginning Ra */
 643   if (has_reph)
 644     info[start].indic_position() = POS_RA_TO_BECOME_REPH;
 645
 646   /* For old-style Indic script tags, move the first post-base Halant after
 647    * last consonant.  Only do this if there is *not* a Halant after last
 648    * consonant.  Otherwise it becomes messy. */
 649   if (indic_plan->is_old_spec) {
 650     for (unsigned int i = base + 1; i < end; i++)
 651       if (info[i].indic_category() == OT_H) {
 652         unsigned int j;
 653         for (j = end - 1; j > i; j--)
 654           if (is_consonant (info[j]) || info[j].indic_category() == OT_H)
 655             break;
 656         if (info[j].indic_category() != OT_H && j > i) {
 657           /* Move Halant to after last consonant. */
 658           hb_glyph_info_t t = info[i];
 659           memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
 660           info[j] = t;
 661         }
 662         break;
 663       }
 664   }
 665
 666   /* Attach misc marks to previous char to move with them. */
 667   {
 668     indic_position_t last_pos = POS_START;
 669     for (unsigned int i = start; i < end; i++)
 670     {
 671       if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | HALANT_OR_COENG_FLAGS)))
 672       {
 673         info[i].indic_position() = last_pos;
 674         if (unlikely (info[i].indic_category() == OT_H &&
 675                       info[i].indic_position() == POS_PRE_M))
 676         {
 677           /*
 678            * Uniscribe doesn't move the Halant with Left Matra.
 679            * TEST: U+092B,U+093F,U+094DE
 680            * We follow.  This is important for the Sinhala
 681            * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
 682            * where U+0DD9 is a left matra and U+0DCA is the virama.
 683            * We don't want to move the virama with the left matra.
 684            * TEST: U+0D9A,U+0DDA
 685            */
 686           for (unsigned int j = i; j > start; j--)
 687             if (info[j - 1].indic_position() != POS_PRE_M) {
 688               info[i].indic_position() = info[j - 1].indic_position();
 689               break;
 690             }
 691         }
 692       } else if (info[i].indic_position() != POS_SMVD) {
 693         last_pos = (indic_position_t) info[i].indic_position();
 694       }
 695     }
 696   }
 697   /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
 698   {
 699     unsigned int last_halant = end;
 700     for (unsigned int i = base + 1; i < end; i++)
 701       if (is_halant_or_coeng (info[i]))
 702         last_halant = i;
 703       else if (is_consonant (info[i])) {
 704         for (unsigned int j = last_halant; j < i; j++)
 705           if (info[j].indic_position() != POS_SMVD)
 706             info[j].indic_position() = info[i].indic_position();
 707       }
 708   }
 709
 710   {
 711     /* Things are out-of-control for post base positions, they may shuffle
 712      * around like crazy, so merge clusters.  For pre-base stuff, we handle
 713      * cluster issues in final reordering. */
 714     buffer->merge_clusters (base, end);
 715     /* Sit tight, rock 'n roll! */
 716     hb_bubble_sort (info + start, end - start, compare_indic_order);
 717     /* Find base again */
 718     base = end;
 719     for (unsigned int i = start; i < end; i++)
 720       if (info[i].indic_position() == POS_BASE_C) {
 721         base = i;
 722         break;
 723       }
 724   }
 725
 726   /* Setup masks now */
 727
 728   {
 729     hb_mask_t mask;
 730
 731     /* Reph */
 732     for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++)
 733       info[i].mask |= indic_plan->mask_array[RPHF];
 734
 735     /* Pre-base */
 736     mask = indic_plan->mask_array[HALF];
 737     for (unsigned int i = start; i < base; i++)
 738       info[i].mask  |= mask;
 739     /* Base */
 740     mask = 0;
 741     if (base < end)
 742       info[base].mask |= mask;
 743     /* Post-base */
 744     mask = indic_plan->mask_array[BLWF] | indic_plan->mask_array[ABVF] | indic_plan->mask_array[PSTF];
 745     for (unsigned int i = base + 1; i < end; i++)
 746       info[i].mask  |= mask;
 747   }
 748
 749   if (indic_plan->mask_array[PREF] && base + 2 < end)
 750   {
 751     /* Find a Halant,Ra sequence and mark it for pre-base reordering processing. */
 752     for (unsigned int i = base + 1; i + 1 < end; i++) {
 753       hb_codepoint_t glyphs[2] = {info[i].codepoint, info[i + 1].codepoint};
 754       if (indic_plan->pref.would_substitute (glyphs, ARRAY_LENGTH (glyphs), true, face))
 755       {
 756         info[i++].mask |= indic_plan->mask_array[PREF];
 757         info[i++].mask |= indic_plan->mask_array[PREF];
 758
 759         /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
 760          * Read the feature spec.
 761          * This allows distinguishing the following cases with MS Khmer fonts:
 762          * U+1784,U+17D2,U+179A,U+17D2,U+1782
 763          * U+1784,U+17D2,U+1782,U+17D2,U+179A
 764          */
 765         for (; i < end; i++)
 766           info[i].mask |= indic_plan->mask_array[CFAR];
 767
 768         break;
 769       }
 770     }
 771   }
 772
 773   /* Apply ZWJ/ZWNJ effects */
 774   for (unsigned int i = start + 1; i < end; i++)
 775     if (is_joiner (info[i])) {
 776       bool non_joiner = info[i].indic_category() == OT_ZWNJ;
 777       unsigned int j = i;
 778
 779       do {
 780         j--;
 781
 782         /* A ZWJ disables CJCT, however, it's mere presence is enough
 783          * to disable ligation.  No explicit action needed. */
 784
 785         /* A ZWNJ disables HALF. */
 786         if (non_joiner)
 787           info[j].mask &= ~indic_plan->mask_array[HALF];
 788
 789       } while (j > start && !is_consonant (info[j]));
 790     }
 791 }
 792
 793
 794 static void
 795 initial_reordering_vowel_syllable (const hb_ot_shape_plan_t *plan,
 796                                    hb_face_t *face,
 797                                    hb_buffer_t *buffer,
 798                                    unsigned int start, unsigned int end)
 799 {
 800   /* We made the vowels look like consonants.  So let's call the consonant logic! */
 801   initial_reordering_consonant_syllable (plan, face, buffer, start, end);
 802 }
 803
 804 static void
 805 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
 806                                        hb_face_t *face,
 807                                        hb_buffer_t *buffer,
 808                                        unsigned int start, unsigned int end)
 809 {
 810   /* We treat NBSP/dotted-circle as if they are consonants, so we should just chain.
 811    * Only if not in compatibility mode that is... */
 812
 813   if (indic_options ().uniscribe_bug_compatible)
 814   {
 815     /* For dotted-circle, this is what Uniscribe does:
 816      * If dotted-circle is the last glyph, it just does nothing.
 817      * Ie. It doesn't form Reph. */
 818     if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
 819       return;
 820   }
 821
 822   initial_reordering_consonant_syllable (plan, face, buffer, start, end);
 823 }
 824
 825 static void
 826 initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
 827                                    hb_face_t *face,
 828                                    hb_buffer_t *buffer,
 829                                    unsigned int start, unsigned int end)
 830 {
 831   /* We already inserted dotted-circles, so just call the standalone_cluster. */
 832   initial_reordering_standalone_cluster (plan, face, buffer, start, end);
 833 }
 834
 835 static void
 836 initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
 837                                       hb_face_t *face HB_UNUSED,
 838                                       hb_buffer_t *buffer HB_UNUSED,
 839                                       unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
 840 {
 841   /* Nothing to do right now.  If we ever switch to using the output
 842    * buffer in the reordering process, we'd need to next_glyph() here. */
 843 }
 844
 845
 846 static void
 847 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 848                              hb_face_t *face,
 849                              hb_buffer_t *buffer,
 850                              unsigned int start, unsigned int end)
 851 {
 852   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 853   switch (syllable_type) {
 854   case consonant_syllable:      initial_reordering_consonant_syllable (plan, face, buffer, start, end); return;
 855   case vowel_syllable:          initial_reordering_vowel_syllable     (plan, face, buffer, start, end); return;
 856   case standalone_cluster:      initial_reordering_standalone_cluster (plan, face, buffer, start, end); return;
 857   case broken_cluster:          initial_reordering_broken_cluster     (plan, face, buffer, start, end); return;
 858   case non_indic_cluster:       initial_reordering_non_indic_cluster  (plan, face, buffer, start, end); return;
 859   }
 860 }
 861
 862 static inline void
 863 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 864                        hb_font_t *font,
 865                        hb_buffer_t *buffer)
 866 {
 867   /* Note: This loop is extra overhead, but should not be measurable. */
 868   bool has_broken_syllables = false;
 869   unsigned int count = buffer->len;
 870   for (unsigned int i = 0; i < count; i++)
 871     if ((buffer->info[i].syllable() & 0x0F) == broken_cluster) {
 872       has_broken_syllables = true;
 873       break;
 874     }
 875   if (likely (!has_broken_syllables))
 876     return;
 877
 878
 879   hb_codepoint_t dottedcircle_glyph;
 880   if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph))
 881     return;
 882
 883   hb_glyph_info_t dottedcircle = {0};
 884   dottedcircle.codepoint = 0x25CC;
 885   set_indic_properties (dottedcircle);
 886   dottedcircle.codepoint = dottedcircle_glyph;
 887
 888   buffer->clear_output ();
 889
 890   buffer->idx = 0;
 891   unsigned int last_syllable = 0;
 892   while (buffer->idx < buffer->len)
 893   {
 894     unsigned int syllable = buffer->cur().syllable();
 895     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 896     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 897     {
 898       last_syllable = syllable;
 899
 900       hb_glyph_info_t info = dottedcircle;
 901       info.cluster = buffer->cur().cluster;
 902       info.mask = buffer->cur().mask;
 903       info.syllable() = buffer->cur().syllable();
 904
 905       /* Insert dottedcircle after possible Repha. */
 906       while (buffer->idx < buffer->len &&
 907              last_syllable == buffer->cur().syllable() &&
 908              buffer->cur().indic_category() == OT_Repha)
 909         buffer->next_glyph ();
 910
 911       buffer->output_info (info);
 912     }
 913     else
 914       buffer->next_glyph ();
 915   }
 916
 917   buffer->swap_buffers ();
 918 }
 919
 920 static void
 921 initial_reordering (const hb_ot_shape_plan_t *plan,
 922                     hb_font_t *font,
 923                     hb_buffer_t *buffer)
 924 {
 925   update_consonant_positions (plan, font, buffer);
 926   insert_dotted_circles (plan, font, buffer);
 927
 928   hb_glyph_info_t *info = buffer->info;
 929   unsigned int count = buffer->len;
 930   if (unlikely (!count)) return;
 931   unsigned int last = 0;
 932   unsigned int last_syllable = info[0].syllable();
 933   for (unsigned int i = 1; i < count; i++)
 934     if (last_syllable != info[i].syllable()) {
 935       initial_reordering_syllable (plan, font->face, buffer, last, i);
 936       last = i;
 937       last_syllable = info[last].syllable();
 938     }
 939   initial_reordering_syllable (plan, font->face, buffer, last, count);
 940 }
 941
 942 static void
 943 final_reordering_syllable (const hb_ot_shape_plan_t *plan,
 944                            hb_buffer_t *buffer,
 945                            unsigned int start, unsigned int end)
 946 {
 947   const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
 948   hb_glyph_info_t *info = buffer->info;
 949
 950   /* 4. Final reordering:
 951    *
 952    * After the localized forms and basic shaping forms GSUB features have been
 953    * applied (see below), the shaping engine performs some final glyph
 954    * reordering before applying all the remaining font features to the entire
 955    * cluster.
 956    */
 957
 958   /* Find base again */
 959   unsigned int base;
 960   for (base = start; base < end; base++)
 961     if (info[base].indic_position() >= POS_BASE_C) {
 962       if (start < base && info[base].indic_position() > POS_BASE_C)
 963         base--;
 964       break;
 965     }
 966
 967
 968   /*   o Reorder matras:
 969    *
 970    *     If a pre-base matra character had been reordered before applying basic
 971    *     features, the glyph can be moved closer to the main consonant based on
 972    *     whether half-forms had been formed. Actual position for the matra is
 973    *     defined as “after last standalone halant glyph, after initial matra
 974    *     position and before the main consonant”. If ZWJ or ZWNJ follow this
 975    *     halant, position is moved after it.
 976    */
 977
 978   if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
 979   {
 980     /* If we lost track of base, alas, position before last thingy. */
 981     unsigned int new_pos = base == end ? base - 2 : base - 1;
 982
 983     /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
 984      * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
 985      * We want to position matra after them.
 986      */
 987     if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
 988     {
 989       while (new_pos > start &&
 990              !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))))
 991         new_pos--;
 992
 993       /* If we found no Halant we are done.
 994        * Otherwise only proceed if the Halant does
 995        * not belong to the Matra itself! */
 996       if (is_halant_or_coeng (info[new_pos]) &&
 997           info[new_pos].indic_position() != POS_PRE_M)
 998       {
 999         /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
1000         if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
1001           new_pos++;
1002       }
1003       else
1004         new_pos = start; /* No move. */
1005     }
1006
1007     if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M)
1008     {
1009       /* Now go see if there's actually any matras... */
1010       for (unsigned int i = new_pos; i > start; i--)
1011         if (info[i - 1].indic_position () == POS_PRE_M)
1012         {
1013           unsigned int old_pos = i - 1;
1014           hb_glyph_info_t tmp = info[old_pos];
1015           memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
1016           info[new_pos] = tmp;
1017           new_pos--;
1018         }
1019       buffer->merge_clusters (new_pos, MIN (end, base + 1));
1020     } else {
1021       for (unsigned int i = start; i < base; i++)
1022         if (info[i].indic_position () == POS_PRE_M) {
1023           buffer->merge_clusters (i, MIN (end, base + 1));
1024           break;
1025         }
1026     }
1027   }
1028
1029
1030   /*   o Reorder reph:
1031    *
1032    *     Reph’s original position is always at the beginning of the syllable,
1033    *     (i.e. it is not reordered at the character reordering stage). However,
1034    *     it will be reordered according to the basic-forms shaping results.
1035    *     Possible positions for reph, depending on the script, are; after main,
1036    *     before post-base consonant forms, and after post-base consonant forms.
1037    */
1038
1039   /* If there's anything after the Ra that has the REPH pos, it ought to be halant.
1040    * Which means that the font has failed to ligate the Reph.  In which case, we
1041    * shouldn't move. */
1042   if (start + 1 < end &&
1043       info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
1044       info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH)
1045   {
1046     unsigned int new_reph_pos;
1047     reph_position_t reph_pos = indic_plan->config->reph_pos;
1048
1049     /* XXX Figure out old behavior too */
1050
1051     /*       1. If reph should be positioned after post-base consonant forms,
1052      *          proceed to step 5.
1053      */
1054     if (reph_pos == REPH_POS_AFTER_POST)
1055     {
1056       goto reph_step_5;
1057     }
1058
1059     /*       2. If the reph repositioning class is not after post-base: target
1060      *          position is after the first explicit halant glyph between the
1061      *          first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1062      *          are following this halant, position is moved after it. If such
1063      *          position is found, this is the target position. Otherwise,
1064      *          proceed to the next step.
1065      *
1066      *          Note: in old-implementation fonts, where classifications were
1067      *          fixed in shaping engine, there was no case where reph position
1068      *          will be found on this step.
1069      */
1070     {
1071       new_reph_pos = start + 1;
1072       while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos]))
1073         new_reph_pos++;
1074
1075       if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) {
1076         /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1077         if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
1078           new_reph_pos++;
1079         goto reph_move;
1080       }
1081     }
1082
1083     /*       3. If reph should be repositioned after the main consonant: find the
1084      *          first consonant not ligated with main, or find the first
1085      *          consonant that is not a potential pre-base reordering Ra.
1086      */
1087     if (reph_pos == REPH_POS_AFTER_MAIN)
1088     {
1089       new_reph_pos = base;
1090       /* XXX Skip potential pre-base reordering Ra. */
1091       while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN)
1092         new_reph_pos++;
1093       if (new_reph_pos < end)
1094         goto reph_move;
1095     }
1096
1097     /*       4. If reph should be positioned before post-base consonant, find
1098      *          first post-base classified consonant not ligated with main. If no
1099      *          consonant is found, the target position should be before the
1100      *          first matra, syllable modifier sign or vedic sign.
1101      */
1102     /* This is our take on what step 4 is trying to say (and failing, BADLY). */
1103     if (reph_pos == REPH_POS_AFTER_SUB)
1104     {
1105       new_reph_pos = base;
1106       while (new_reph_pos < end &&
1107              !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
1108         new_reph_pos++;
1109       if (new_reph_pos < end)
1110         goto reph_move;
1111     }
1112
1113     /*       5. If no consonant is found in steps 3 or 4, move reph to a position
1114      *          immediately before the first post-base matra, syllable modifier
1115      *          sign or vedic sign that has a reordering class after the intended
1116      *          reph position. For example, if the reordering position for reph
1117      *          is post-main, it will skip above-base matras that also have a
1118      *          post-main position.
1119      */
1120     reph_step_5:
1121     {
1122       /* Copied from step 2. */
1123       new_reph_pos = start + 1;
1124       while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos]))
1125         new_reph_pos++;
1126
1127       if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) {
1128         /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1129         if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
1130           new_reph_pos++;
1131         goto reph_move;
1132       }
1133     }
1134
1135     /*       6. Otherwise, reorder reph to the end of the syllable.
1136      */
1137     {
1138       new_reph_pos = end - 1;
1139       while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD)
1140         new_reph_pos--;
1141
1142       /*
1143        * If the Reph is to be ending up after a Matra,Halant sequence,
1144        * position it before that Halant so it can interact with the Matra.
1145        * However, if it's a plain Consonant,Halant we shouldn't do that.
1146        * Uniscribe doesn't do this.
1147        * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1148        */
1149       if (!indic_options ().uniscribe_bug_compatible &&
1150           unlikely (is_halant_or_coeng (info[new_reph_pos]))) {
1151         for (unsigned int i = base + 1; i < new_reph_pos; i++)
1152           if (info[i].indic_category() == OT_M) {
1153             /* Ok, got it. */
1154             new_reph_pos--;
1155           }
1156       }
1157       goto reph_move;
1158     }
1159
1160     reph_move:
1161     {
1162       /* Yay, one big cluster! Merge before moving. */
1163       buffer->merge_clusters (start, end);
1164
1165       /* Move */
1166       hb_glyph_info_t reph = info[start];
1167       memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
1168       info[new_reph_pos] = reph;
1169     }
1170   }
1171
1172
1173   /*   o Reorder pre-base reordering consonants:
1174    *
1175    *     If a pre-base reordering consonant is found, reorder it according to
1176    *     the following rules:
1177    */
1178
1179   if (indic_plan->mask_array[PREF] && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */
1180   {
1181     for (unsigned int i = base + 1; i < end; i++)
1182       if ((info[i].mask & indic_plan->mask_array[PREF]) != 0)
1183       {
1184         /*       1. Only reorder a glyph produced by substitution during application
1185          *          of the <pref> feature. (Note that a font may shape a Ra consonant with
1186          *          the feature generally but block it in certain contexts.)
1187          */
1188         if (i + 1 == end || (info[i + 1].mask & indic_plan->mask_array[PREF]) == 0)
1189         {
1190           /*
1191            *       2. Try to find a target position the same way as for pre-base matra.
1192            *          If it is found, reorder pre-base consonant glyph.
1193            *
1194            *       3. If position is not found, reorder immediately before main
1195            *          consonant.
1196            */
1197
1198           unsigned int new_pos = base;
1199           /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
1200            * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1201            * We want to position matra after them.
1202            */
1203           if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
1204           {
1205             while (new_pos > start &&
1206                    !(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS)))
1207               new_pos--;
1208
1209             /* In Khmer coeng model, a V,Ra can go *after* matras.  If it goes after a
1210              * split matra, it should be reordered to *before* the left part of such matra. */
1211             if (new_pos > start && info[new_pos - 1].indic_category() == OT_M)
1212             {
1213               unsigned int old_pos = i;
1214               for (unsigned int i = base + 1; i < old_pos; i++)
1215                 if (info[i].indic_category() == OT_M)
1216                 {
1217                   new_pos--;
1218                   break;
1219                 }
1220             }
1221           }
1222
1223           if (new_pos > start && is_halant_or_coeng (info[new_pos - 1]))
1224             /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
1225             if (new_pos < end && is_joiner (info[new_pos]))
1226               new_pos++;
1227
1228           {
1229             unsigned int old_pos = i;
1230             buffer->merge_clusters (new_pos, old_pos + 1);
1231             hb_glyph_info_t tmp = info[old_pos];
1232             memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
1233             info[new_pos] = tmp;
1234           }
1235         }
1236
1237         break;
1238       }
1239   }
1240
1241
1242   /* Apply 'init' to the Left Matra if it's a word start. */
1243   if (info[start].indic_position () == POS_PRE_M &&
1244       (!start ||
1245        !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) &
1246          FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
1247     info[start].mask |= indic_plan->mask_array[INIT];
1248
1249
1250   /*
1251    * Finish off the clusters and go home!
1252    */
1253   if (indic_options ().uniscribe_bug_compatible)
1254   {
1255     /* Uniscribe merges the entire cluster.
1256      * This means, half forms are submerged into the main consonants cluster.
1257      * This is unnecessary, and makes cursor positioning harder, but that's what
1258      * Uniscribe does. */
1259     buffer->merge_clusters (start, end);
1260   }
1261 }
1262
1263
1264 static void
1265 final_reordering (const hb_ot_shape_plan_t *plan,
1266                   hb_font_t *font HB_UNUSED,
1267                   hb_buffer_t *buffer)
1268 {
1269   unsigned int count = buffer->len;
1270   if (unlikely (!count)) return;
1271
1272   hb_glyph_info_t *info = buffer->info;
1273   unsigned int last = 0;
1274   unsigned int last_syllable = info[0].syllable();
1275   for (unsigned int i = 1; i < count; i++)
1276     if (last_syllable != info[i].syllable()) {
1277       final_reordering_syllable (plan, buffer, last, i);
1278       last = i;
1279       last_syllable = info[last].syllable();
1280     }
1281   final_reordering_syllable (plan, buffer, last, count);
1282
1283   /* Zero syllables now... */
1284   for (unsigned int i = 0; i < count; i++)
1285     info[i].syllable() = 0;
1286
1287   HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
1288   HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
1289 }
1290
1291
1292 static hb_ot_shape_normalization_mode_t
1293 normalization_preference_indic (const hb_segment_properties_t *props HB_UNUSED)
1294 {
1295   return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
1296 }
1297
1298 static bool
1299 decompose_indic (const hb_ot_shape_normalize_context_t *c,
1300                  hb_codepoint_t  ab,
1301                  hb_codepoint_t *a,
1302                  hb_codepoint_t *b)
1303 {
1304   switch (ab)
1305   {
1306     /* Don't decompose these. */
1307     case 0x0931  : return false;
1308     case 0x0B94  : return false;
1309
1310
1311     /*
1312      * Decompose split matras that don't have Unicode decompositions.
1313      */
1314
1315     case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
1316     case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
1317     case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
1318     case 0x17BF  : *a = 0x17C1; *b= 0x17BF; return true;
1319     case 0x17C0  : *a = 0x17C1; *b= 0x17C0; return true;
1320     case 0x17C4  : *a = 0x17C1; *b= 0x17C4; return true;
1321     case 0x17C5  : *a = 0x17C1; *b= 0x17C5; return true;
1322     case 0x1925  : *a = 0x1920; *b= 0x1923; return true;
1323     case 0x1926  : *a = 0x1920; *b= 0x1924; return true;
1324     case 0x1B3C  : *a = 0x1B42; *b= 0x1B3C; return true;
1325     case 0x1112E  : *a = 0x11127; *b= 0x11131; return true;
1326     case 0x1112F  : *a = 0x11127; *b= 0x11132; return true;
1327 #if 0
1328     /* This one has no decomposition in Unicode, but needs no decomposition either. */
1329     /* case 0x0AC9  : return false; */
1330     case 0x0B57  : *a = no decomp, -> RIGHT; return true;
1331     case 0x1C29  : *a = no decomp, -> LEFT; return true;
1332     case 0xA9C0  : *a = no decomp, -> RIGHT; return true;
1333     case 0x111BF  : *a = no decomp, -> ABOVE; return true;
1334 #endif
1335   }
1336
1337   if ((ab == 0x0DDA || hb_in_range<hb_codepoint_t> (ab, 0x0DDC, 0x0DDE)))
1338   {
1339     /*
1340      * Sinhala split matras...  Let the fun begin.
1341      *
1342      * These four characters have Unicode decompositions.  However, Uniscribe
1343      * decomposes them "Khmer-style", that is, it uses the character itself to
1344      * get the second half.  The first half of all four decompositions is always
1345      * U+0DD9.
1346      *
1347      * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
1348      * broken with Uniscribe.  But we need to support them.  As such, we only
1349      * do the Uniscribe-style decomposition if the character is transformed into
1350      * its "sec.half" form by the 'pstf' feature.  Otherwise, we fall back to
1351      * Unicode decomposition.
1352      *
1353      * Note that we can't unconditionally use Unicode decomposition.  That would
1354      * break some other fonts, that are designed to work with Uniscribe, and
1355      * don't have positioning features for the Unicode-style decomposition.
1356      *
1357      * Argh...
1358      *
1359      * The Uniscribe behavior is now documented in the newly published Sinhala
1360      * spec in 2012:
1361      *
1362      *   http://www.microsoft.com/typography/OpenTypeDev/sinhala/intro.htm#shaping
1363      */
1364
1365     const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
1366
1367     hb_codepoint_t glyph;
1368
1369     if (indic_options ().uniscribe_bug_compatible ||
1370         (c->font->get_glyph (ab, 0, &glyph) &&
1371          indic_plan->pstf.would_substitute (&glyph, 1, true, c->font->face)))
1372     {
1373       /* Ok, safe to use Uniscribe-style decomposition. */
1374       *a = 0x0DD9;
1375       *b = ab;
1376       return true;
1377     }
1378   }
1379
1380   return c->unicode->decompose (ab, a, b);
1381 }
1382
1383 static bool
1384 compose_indic (const hb_ot_shape_normalize_context_t *c,
1385                hb_codepoint_t  a,
1386                hb_codepoint_t  b,
1387                hb_codepoint_t *ab)
1388 {
1389   /* Avoid recomposing split matras. */
1390   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
1391     return false;
1392
1393   /* Composition-exclusion exceptions that we want to recompose. */
1394   if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
1395
1396   return c->unicode->compose (a, b, ab);
1397 }
1398
1399
1400 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
1401 {
1402   "indic",
1403   collect_features_indic,
1404   override_features_indic,
1405   data_create_indic,
1406   data_destroy_indic,
1407   NULL, /* preprocess_text */
1408   normalization_preference_indic,
1409   decompose_indic,
1410   compose_indic,
1411   setup_masks_indic,
1412   false, /* zero_width_attached_marks */
1413   false, /* fallback_position */
1414 };