#ifndef HB_NO_OT_SHAPE
-#include "hb-ot-shape-complex-indic.hh"
-#include "hb-ot-shape-complex-indic-machine.hh"
-#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-shaper-indic.hh"
+#include "hb-ot-shaper-indic-machine.hh"
+#include "hb-ot-shaper-vowel-constraints.hh"
#include "hb-ot-layout.hh"
*/
+static inline void
+set_indic_properties (hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ unsigned int type = hb_indic_get_categories (u);
+
+ info.indic_category() = (indic_category_t) (type & 0xFFu);
+ info.indic_position() = (indic_position_t) (type >> 8);
+}
+
+
+static inline bool
+is_one_of (const hb_glyph_info_t &info, unsigned int flags)
+{
+ /* If it ligated, all bets are off. */
+ if (_hb_glyph_info_ligated (&info)) return false;
+ return !!(FLAG_UNSAFE (info.indic_category()) & flags);
+}
+
+/* Note:
+ *
+ * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
+ * cannot happen in a consonant syllable. The plus side however is, we can call the
+ * consonant syllable logic from the vowel syllable function and get it all right!
+ *
+ * Keep in sync with consonant_categories in the generator. */
+#define CONSONANT_FLAGS_INDIC (FLAG (I_Cat(C)) | FLAG (I_Cat(CS)) | FLAG (I_Cat(Ra)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(V)) | FLAG (I_Cat(PLACEHOLDER)) | FLAG (I_Cat(DOTTEDCIRCLE)))
+
+static inline bool
+is_consonant (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, CONSONANT_FLAGS_INDIC);
+}
+
+#define JOINER_FLAGS (FLAG (I_Cat(ZWJ)) | FLAG (I_Cat(ZWNJ)))
+
+static inline bool
+is_joiner (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, JOINER_FLAGS);
+}
+
+static inline bool
+is_halant (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, FLAG (I_Cat(H)));
+}
+
+struct hb_indic_would_substitute_feature_t
+{
+ void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+ {
+ zero_context = zero_context_;
+ lookups = map->get_stage_lookups (0/*GSUB*/,
+ map->get_feature_stage (0/*GSUB*/, feature_tag));
+ }
+
+ bool would_substitute (const hb_codepoint_t *glyphs,
+ unsigned int glyphs_count,
+ hb_face_t *face) const
+ {
+ for (const auto &lookup : lookups)
+ if (hb_ot_layout_lookup_would_substitute (face, lookup.index, glyphs, glyphs_count, zero_context))
+ return true;
+ return false;
+ }
+
+ private:
+ hb_array_t<const hb_ot_map_t::lookup_map_t> lookups;
+ bool zero_context;
+};
+
+
/*
* Indic configurations. Note that we do not want to keep every single script-specific
* behavior in these tables necessarily. This should mainly be used for per-script
* instead of adding a new flag in these structs.
*/
-enum base_position_t {
- BASE_POS_LAST_SINHALA,
- BASE_POS_LAST
-};
enum reph_position_t {
REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
hb_script_t script;
bool has_old_spec;
hb_codepoint_t virama;
- base_position_t base_pos;
reph_position_t reph_pos;
reph_mode_t reph_mode;
blwf_mode_t blwf_mode;
static const indic_config_t indic_configs[] =
{
/* Default. Should be first. */
- {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
- {HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
- {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
- {HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA,
- REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_INVALID, false, 0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_BENGALI, true, 0x09CDu,REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_GUJARATI, true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_ORIYA, true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_TAMIL, true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_TELUGU, true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
+ {HB_SCRIPT_KANNADA, true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
+ {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
};
-
-/*
- * Indic shaper.
- */
-
static const hb_ot_map_feature_t
indic_features[] =
{
* These features are applied in order, one at a time, after initial_reordering,
* constrained to the syllable.
*/
- {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS},
- {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
- {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
- {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
- {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS},
- {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
- {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
/*
* Other features.
* These features are applied all at once, after final_reordering, constrained
* Default Bengali font in Windows for example has intermixed
* lookups for init,pres,abvs,blws features.
*/
- {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS},
- {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
- {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
+ {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
};
/*
INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */
};
-static void
+static bool
setup_syllables_indic (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer);
-static void
+static bool
initial_reordering_indic (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer);
-static void
+static bool
final_reordering_indic (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer);
/* Do this before any lookups have been applied. */
map->add_gsub_pause (setup_syllables_indic);
- map->enable_feature (HB_TAG('l','o','c','l'));
+ map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE);
/* The Indic specs do not require ccmp, but we apply it here since if
* there is a use of it, it's typically at the beginning. */
- map->enable_feature (HB_TAG('c','c','m','p'));
+ map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE);
unsigned int i = 0;
for (; i < INDIC_NUM_FEATURES; i++)
map->add_feature (indic_features[i]);
-
- map->add_gsub_pause (_hb_clear_syllables);
}
static void
override_features_indic (hb_ot_shape_planner_t *plan)
{
plan->map.disable_feature (HB_TAG('l','i','g','a'));
+ plan->map.add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var
}
{
bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
{
- hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+ hb_codepoint_t glyph = virama_glyph;
if (unlikely (glyph == (hb_codepoint_t) -1))
{
if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
/* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
* during shape planning... Instead, overwrite it here. */
- virama_glyph.set_relaxed ((int) glyph);
+ virama_glyph = (int) glyph;
}
*pglyph = glyph;
#ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE
indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
#endif
- indic_plan->virama_glyph.set_relaxed (-1);
+ indic_plan->virama_glyph = -1;
/* Use zero-context would_substitute() matching for new-spec of the main
* Indic scripts, and scripts with one spec only, but not for old-specs.
set_indic_properties (info[i]);
}
-static void
+static bool
setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
hb_font_t *font HB_UNUSED,
hb_buffer_t *buffer)
{
+ HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
find_syllables_indic (buffer);
foreach_syllable (buffer, start, end)
buffer->unsafe_to_break (start, end);
+ return false;
}
static int
int a = pa->indic_position();
int b = pb->indic_position();
- return a < b ? -1 : a == b ? 0 : +1;
+ return (int) a - (int) b;
}
{
const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
- if (indic_plan->config->base_pos != BASE_POS_LAST)
- return;
-
hb_codepoint_t virama;
if (indic_plan->load_virama_glyph (font, &virama))
{
*/
if (buffer->props.script == HB_SCRIPT_KANNADA &&
start + 3 <= end &&
- is_one_of (info[start ], FLAG (OT_Ra)) &&
- is_one_of (info[start+1], FLAG (OT_H)) &&
- is_one_of (info[start+2], FLAG (OT_ZWJ)))
+ is_one_of (info[start ], FLAG (I_Cat(Ra))) &&
+ is_one_of (info[start+1], FLAG (I_Cat(H))) &&
+ is_one_of (info[start+2], FLAG (I_Cat(ZWJ))))
{
buffer->merge_clusters (start+1, start+3);
- hb_glyph_info_t tmp = info[start+1];
- info[start+1] = info[start+2];
- info[start+2] = tmp;
+ hb_swap (info[start+1], info[start+2]);
}
/* 1. Find base consonant:
start + 3 <= end &&
(
(indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
- (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ)
+ (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == I_Cat(ZWJ))
))
{
/* See if it matches the 'rphf' feature. */
base = start;
has_reph = true;
}
- } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha)
+ } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == I_Cat(Repha))
{
limit += 1;
while (limit < end && is_joiner (info[limit]))
has_reph = true;
}
- switch (indic_plan->config->base_pos)
{
- case BASE_POS_LAST:
- {
- /* -> starting from the end of the syllable, move backwards */
- unsigned int i = end;
- bool seen_below = false;
- do {
- i--;
- /* -> until a consonant is found */
- if (is_consonant (info[i]))
+ /* -> starting from the end of the syllable, move backwards */
+ unsigned int i = end;
+ bool seen_below = false;
+ do {
+ i--;
+ /* -> until a consonant is found */
+ if (is_consonant (info[i]))
+ {
+ /* -> that does not have a below-base or post-base form
+ * (post-base forms have to follow below-base forms), */
+ if (info[i].indic_position() != POS_BELOW_C &&
+ (info[i].indic_position() != POS_POST_C || seen_below))
{
- /* -> that does not have a below-base or post-base form
- * (post-base forms have to follow below-base forms), */
- if (info[i].indic_position() != POS_BELOW_C &&
- (info[i].indic_position() != POS_POST_C || seen_below))
- {
- base = i;
- break;
- }
- if (info[i].indic_position() == POS_BELOW_C)
- seen_below = true;
-
- /* -> or that is not a pre-base-reordering Ra,
- *
- * IMPLEMENTATION NOTES:
- *
- * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
- * by the logic above already.
- */
-
- /* -> or arrive at the first consonant. The consonant stopped at will
- * be the base. */
base = i;
+ break;
}
- else
- {
- /* A ZWJ after a Halant stops the base search, and requests an explicit
- * half form.
- * A ZWJ before a Halant, requests a subjoined form instead, and hence
- * search continues. This is particularly important for Bengali
- * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
- if (start < i &&
- info[i].indic_category() == OT_ZWJ &&
- info[i - 1].indic_category() == OT_H)
- break;
- }
- } while (i > limit);
- }
- break;
+ if (info[i].indic_position() == POS_BELOW_C)
+ seen_below = true;
- case BASE_POS_LAST_SINHALA:
- {
- /* Sinhala base positioning is slightly different from main Indic, in that:
- * 1. Its ZWJ behavior is different,
- * 2. We don't need to look into the font for consonant positions.
- */
-
- if (!has_reph)
- base = limit;
-
- /* Find the last base consonant that is not blocked by ZWJ. If there is
- * a ZWJ right before a base consonant, that would request a subjoined form. */
- for (unsigned int i = limit; i < end; i++)
- if (is_consonant (info[i]))
- {
- if (limit < i && info[i - 1].indic_category() == OT_ZWJ)
- break;
- else
- base = i;
- }
+ /* -> or that is not a pre-base-reordering Ra,
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
+ * by the logic above already.
+ */
- /* Mark all subsequent consonants as below. */
- for (unsigned int i = base + 1; i < end; i++)
- if (is_consonant (info[i]))
- info[i].indic_position() = POS_BELOW_C;
- }
- break;
+ /* -> or arrive at the first consonant. The consonant stopped at will
+ * be the base. */
+ base = i;
+ }
+ else
+ {
+ /* A ZWJ after a Halant stops the base search, and requests an explicit
+ * half form.
+ * A ZWJ before a Halant, requests a subjoined form instead, and hence
+ * search continues. This is particularly important for Bengali
+ * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
+ if (start < i &&
+ info[i].indic_category() == I_Cat(ZWJ) &&
+ info[i - 1].indic_category() == I_Cat(H))
+ break;
+ }
+ } while (i > limit);
}
/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
if (base < end)
info[base].indic_position() = POS_BASE_C;
- /* Mark final consonants. A final consonant is one appearing after a matra.
- * Happens in Sinhala. */
- for (unsigned int i = base + 1; i < end; i++)
- if (info[i].indic_category() == OT_M) {
- for (unsigned int j = i + 1; j < end; j++)
- if (is_consonant (info[j])) {
- info[j].indic_position() = POS_FINAL_C;
- break;
- }
- break;
- }
-
/* Handle beginning Ra */
if (has_reph)
info[start].indic_position() = POS_RA_TO_BECOME_REPH;
{
bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
for (unsigned int i = base + 1; i < end; i++)
- if (info[i].indic_category() == OT_H)
+ if (info[i].indic_category() == I_Cat(H))
{
unsigned int j;
for (j = end - 1; j > i; j--)
if (is_consonant (info[j]) ||
- (disallow_double_halants && info[j].indic_category() == OT_H))
+ (disallow_double_halants && info[j].indic_category() == I_Cat(H)))
break;
- if (info[j].indic_category() != OT_H && j > i) {
+ if (info[j].indic_category() != I_Cat(H) && j > i) {
/* Move Halant to after last consonant. */
hb_glyph_info_t t = info[i];
memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
indic_position_t last_pos = POS_START;
for (unsigned int i = start; i < end; i++)
{
- if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+ if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (I_Cat(N)) | FLAG (I_Cat(RS)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(H)))))
{
info[i].indic_position() = last_pos;
- if (unlikely (info[i].indic_category() == OT_H &&
+ if (unlikely (info[i].indic_category() == I_Cat(H) &&
info[i].indic_position() == POS_PRE_M))
{
/*
* Uniscribe doesn't move the Halant with Left Matra.
- * TEST: U+092B,U+093F,U+094DE
- * We follow. This is important for the Sinhala
- * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
- * where U+0DD9 is a left matra and U+0DCA is the virama.
- * We don't want to move the virama with the left matra.
- * TEST: U+0D9A,U+0DDA
+ * TEST: U+092B,U+093F,U+094D
+ * We follow.
*/
for (unsigned int j = i; j > start; j--)
if (info[j - 1].indic_position() != POS_PRE_M) {
}
}
} else if (info[i].indic_position() != POS_SMVD) {
+ if (info[i].indic_category() == I_Cat(MPst) &&
+ i > start && info[i - 1].indic_category() == I_Cat(SM))
+ info[i - 1].indic_position() = info[i].indic_position();
last_pos = (indic_position_t) info[i].indic_position();
}
}
if (info[j].indic_position() < POS_SMVD)
info[j].indic_position() = info[i].indic_position();
last = i;
- } else if (info[i].indic_category() == OT_M)
+ } else if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
last = i;
}
/* Sit tight, rock 'n roll! */
hb_stable_sort (info + start, end - start, compare_indic_order);
- /* Find base again */
+
+ /* Find base again; also flip left-matra sequence. */
+ unsigned first_left_matra = end;
+ unsigned last_left_matra = end;
base = end;
for (unsigned int i = start; i < end; i++)
+ {
if (info[i].indic_position() == POS_BASE_C)
{
base = i;
break;
}
+ else if (info[i].indic_position() == POS_PRE_M)
+ {
+ if (first_left_matra == end)
+ first_left_matra = i;
+ last_left_matra = i;
+ }
+ }
+ /* https://github.com/harfbuzz/harfbuzz/issues/3863 */
+ if (first_left_matra < last_left_matra)
+ {
+ /* No need to merge clusters, handled later. */
+ buffer->reverse_range (first_left_matra, last_left_matra + 1);
+ /* Reverse back nuktas, etc. */
+ unsigned i = first_left_matra;
+ for (unsigned j = i; j <= last_left_matra; j++)
+ if (FLAG_UNSAFE (info[j].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
+ {
+ buffer->reverse_range (i, j + 1);
+ i = j + 1;
+ }
+ }
+
/* Things are out-of-control for post base positions, they may shuffle
* around like crazy. In old-spec mode, we move halants around, so in
* that case merge all clusters after base. Otherwise, check the sort
* Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
*/
for (unsigned int i = start; i + 1 < base; i++)
- if (info[i ].indic_category() == OT_Ra &&
- info[i+1].indic_category() == OT_H &&
+ if (info[i ].indic_category() == I_Cat(Ra) &&
+ info[i+1].indic_category() == I_Cat(H) &&
(i + 2 == base ||
- info[i+2].indic_category() != OT_ZWJ))
+ info[i+2].indic_category() != I_Cat(ZWJ)))
{
info[i ].mask |= indic_plan->mask_array[INDIC_BLWF];
info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF];
/* Apply ZWJ/ZWNJ effects */
for (unsigned int i = start + 1; i < end; i++)
if (is_joiner (info[i])) {
- bool non_joiner = info[i].indic_category() == OT_ZWNJ;
+ bool non_joiner = info[i].indic_category() == I_Cat(ZWNJ);
unsigned int j = i;
do {
/* For dotted-circle, this is what Uniscribe does:
* If dotted-circle is the last glyph, it just does nothing.
* Ie. It doesn't form Reph. */
- if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
+ if (buffer->info[end - 1].indic_category() == I_Cat(DOTTEDCIRCLE))
return;
}
}
}
-static void
+static bool
initial_reordering_indic (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer)
{
+ bool ret = false;
if (!buffer->message (font, "start reordering indic initial"))
- return;
+ return ret;
update_consonant_positions_indic (plan, font, buffer);
- hb_syllabic_insert_dotted_circles (font, buffer,
- indic_broken_cluster,
- OT_DOTTEDCIRCLE,
- OT_Repha,
- POS_END);
+ if (hb_syllabic_insert_dotted_circles (font, buffer,
+ indic_broken_cluster,
+ I_Cat(DOTTEDCIRCLE),
+ I_Cat(Repha),
+ POS_END))
+ ret = true;
foreach_syllable (buffer, start, end)
initial_reordering_syllable_indic (plan, font->face, buffer, start, end);
(void) buffer->message (font, "end reordering indic initial");
+
+ return ret;
}
static void
* and possibly multiple substitutions happened prior to this
* phase, and that might have messed up our properties. Recover
* from a particular case of that where we're fairly sure that a
- * class of OT_H is desired but has been lost. */
+ * class of I_Cat(H) is desired but has been lost. */
/* We don't call load_virama_glyph(), since we know it's already
* loaded. */
- hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+ hb_codepoint_t virama_glyph = indic_plan->virama_glyph;
if (virama_glyph)
{
for (unsigned int i = start; i < end; i++)
_hb_glyph_info_multiplied (&info[i]))
{
/* This will make sure that this glyph passes is_halant() test. */
- info[i].indic_category() = OT_H;
+ info[i].indic_category() = I_Cat(H);
_hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
}
}
base = i;
while (base < end && is_halant (info[base]))
base++;
- info[base].indic_position() = POS_BASE_C;
+ if (base < end)
+ info[base].indic_position() = POS_BASE_C;
try_pref = false;
}
break;
}
+ if (base == end)
+ break;
}
/* For Malayalam, skip over unformed below- (but NOT post-) forms. */
if (buffer->props.script == HB_SCRIPT_MALAYALAM)
break;
}
if (base == end && start < base &&
- is_one_of (info[base - 1], FLAG (OT_ZWJ)))
+ is_one_of (info[base - 1], FLAG (I_Cat(ZWJ))))
base--;
if (base < end)
while (start < base &&
- is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_H))))
+ is_one_of (info[base], (FLAG (I_Cat(N)) | FLAG (I_Cat(H)))))
base--;
{
search:
while (new_pos > start &&
- !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
+ !(is_one_of (info[new_pos], (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H))))))
new_pos--;
/* If we found no Halant we are done.
if (new_pos + 1 < end)
{
/* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
- if (info[new_pos + 1].indic_category() == OT_ZWJ)
+ if (info[new_pos + 1].indic_category() == I_Cat(ZWJ))
{
/* Keep searching. */
if (new_pos > start)
*/
if (start + 1 < end &&
info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
- ((info[start].indic_category() == OT_Repha) ^
+ ((info[start].indic_category() == I_Cat(Repha)) ^
_hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
{
unsigned int new_reph_pos;
unlikely (is_halant (info[new_reph_pos])))
{
for (unsigned int i = base + 1; i < new_reph_pos; i++)
- if (info[i].indic_category() == OT_M) {
+ if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
+ {
/* Ok, got it. */
new_reph_pos--;
}
if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
{
while (new_pos > start &&
- !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_H))))
+ !(is_one_of (info[new_pos - 1], FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H)))))
new_pos--;
}
switch ((hb_tag_t) plan->props.script)
{
case HB_SCRIPT_TAMIL:
- case HB_SCRIPT_SINHALA:
break;
default:
- /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
+ /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil.
* This means, half forms are submerged into the main consonant's cluster.
* This is unnecessary, and makes cursor positioning harder, but that's what
* Uniscribe does. */
}
-static void
+static bool
final_reordering_indic (const hb_ot_shape_plan_t *plan,
hb_font_t *font HB_UNUSED,
hb_buffer_t *buffer)
{
unsigned int count = buffer->len;
- if (unlikely (!count)) return;
+ if (unlikely (!count)) return false;
if (buffer->message (font, "start reordering indic final")) {
foreach_syllable (buffer, start, end)
HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
+
+ return false;
}
hb_buffer_t *buffer,
hb_font_t *font)
{
- _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+ if (!indic_plan->uniscribe_bug_compatible)
+ _hb_preprocess_text_vowel_constraints (plan, buffer, font);
}
static bool
#endif
}
- if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu)))
- {
- /*
- * Sinhala split matras... Let the fun begin.
- *
- * These four characters have Unicode decompositions. However, Uniscribe
- * decomposes them "Khmer-style", that is, it uses the character itself to
- * get the second half. The first half of all four decompositions is always
- * U+0DD9.
- *
- * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
- * broken with Uniscribe. But we need to support them. As such, we only
- * do the Uniscribe-style decomposition if the character is transformed into
- * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
- * Unicode decomposition.
- *
- * Note that we can't unconditionally use Unicode decomposition. That would
- * break some other fonts, that are designed to work with Uniscribe, and
- * don't have positioning features for the Unicode-style decomposition.
- *
- * Argh...
- *
- * The Uniscribe behavior is now documented in the newly published Sinhala
- * spec in 2012:
- *
- * https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
- */
-
-
- const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
- hb_codepoint_t glyph;
- if (indic_plan->uniscribe_bug_compatible ||
- (c->font->get_nominal_glyph (ab, &glyph) &&
- indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))
- {
- /* Ok, safe to use Uniscribe-style decomposition. */
- *a = 0x0DD9u;
- *b = ab;
- return true;
- }
- }
-
return (bool) c->unicode->decompose (ab, a, b);
}
}
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
+const hb_ot_shaper_t _hb_ot_shaper_indic =
{
collect_features_indic,
override_features_indic,
data_destroy_indic,
preprocess_text_indic,
nullptr, /* postprocess_glyphs */
- HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
decompose_indic,
compose_indic,
setup_masks_indic,
- HB_TAG_NONE, /* gpos_tag */
nullptr, /* reorder_marks */
+ HB_TAG_NONE, /* gpos_tag */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */
};