* Google Author(s): Behdad Esfahbod
*/
-#include "hb-ot-shape-complex-indic-private.hh"
-#include "hb-ot-layout-private.hh"
+#include "hb-ot-shape-complex-indic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-layout.hh"
/*
* Indic shaper.
*/
-struct feature_list_t {
- hb_tag_t tag;
- hb_ot_map_feature_flags_t flags;
-};
-
-static const feature_list_t
+static const hb_ot_map_feature_t
indic_features[] =
{
/*
* Basic features.
* These features are applied in order, one at a time, after initial_reordering.
*/
- {HB_TAG('n','u','k','t'), F_GLOBAL},
- {HB_TAG('a','k','h','n'), F_GLOBAL},
- {HB_TAG('r','p','h','f'), F_NONE},
- {HB_TAG('r','k','r','f'), F_GLOBAL},
- {HB_TAG('p','r','e','f'), F_NONE},
- {HB_TAG('b','l','w','f'), F_NONE},
- {HB_TAG('a','b','v','f'), F_NONE},
- {HB_TAG('h','a','l','f'), F_NONE},
- {HB_TAG('p','s','t','f'), F_NONE},
- {HB_TAG('v','a','t','u'), F_GLOBAL},
- {HB_TAG('c','j','c','t'), F_GLOBAL},
+ {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS},
+ {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
+ {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
/*
* Other features.
- * These features are applied all at once, after final_reordering.
+ * These features are applied all at once, after final_reordering
+ * but before clearing syllables.
* Default Bengali font in Windows for example has intermixed
* lookups for init,pres,abvs,blws features.
*/
- {HB_TAG('i','n','i','t'), F_NONE},
- {HB_TAG('p','r','e','s'), F_GLOBAL},
- {HB_TAG('a','b','v','s'), F_GLOBAL},
- {HB_TAG('b','l','w','s'), F_GLOBAL},
- {HB_TAG('p','s','t','s'), F_GLOBAL},
- {HB_TAG('h','a','l','n'), F_GLOBAL},
- /* Positioning features, though we don't care about the types. */
+ {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+ /*
+ * Positioning features.
+ * We don't care about the types.
+ */
{HB_TAG('d','i','s','t'), F_GLOBAL},
{HB_TAG('a','b','v','m'), F_GLOBAL},
{HB_TAG('b','l','w','m'), F_GLOBAL},
_BLWS,
_PSTS,
_HALN,
+
_DIST,
_ABVM,
_BLWM,
INDIC_NUM_FEATURES,
- INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */
+ INDIC_BASIC_FEATURES = INIT, /* Don't forget to update this! */
};
static void
/* Do this before any lookups have been applied. */
map->add_gsub_pause (setup_syllables);
- map->add_global_bool_feature (HB_TAG('l','o','c','l'));
+ map->enable_feature (HB_TAG('l','o','c','l'));
/* The Indic specs do not require ccmp, but we apply it here since if
* there is a use of it, it's typically at the beginning. */
- map->add_global_bool_feature (HB_TAG('c','c','m','p'));
+ map->enable_feature (HB_TAG('c','c','m','p'));
unsigned int i = 0;
map->add_gsub_pause (initial_reordering);
+
for (; i < INDIC_BASIC_FEATURES; i++) {
- map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
+ map->add_feature (indic_features[i]);
map->add_gsub_pause (nullptr);
}
+
map->add_gsub_pause (final_reordering);
- for (; i < INDIC_NUM_FEATURES; i++) {
- map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
- }
- map->add_global_bool_feature (HB_TAG('c','a','l','t'));
- map->add_global_bool_feature (HB_TAG('c','l','i','g'));
+ for (; i < INDIC_NUM_FEATURES; i++)
+ map->add_feature (indic_features[i]);
+
+ map->enable_feature (HB_TAG('c','a','l','t'));
+ map->enable_feature (HB_TAG('c','l','i','g'));
map->add_gsub_pause (clear_syllables);
}
static void
override_features_indic (hb_ot_shape_planner_t *plan)
{
- plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
+ plan->map.disable_feature (HB_TAG('l','i','g','a'));
}
struct would_substitute_feature_t
{
- inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+ void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
{
zero_context = zero_context_;
map->get_stage_lookups (0/*GSUB*/,
&lookups, &count);
}
- inline bool would_substitute (const hb_codepoint_t *glyphs,
- unsigned int glyphs_count,
- hb_face_t *face) const
+ bool would_substitute (const hb_codepoint_t *glyphs,
+ unsigned int glyphs_count,
+ hb_face_t *face) const
{
for (unsigned int i = 0; i < count; i++)
if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
struct indic_shape_plan_t
{
- ASSERT_POD ();
-
- inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
+ bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
{
- hb_codepoint_t glyph = virama_glyph;
- if (unlikely (virama_glyph == (hb_codepoint_t) -1))
+ hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+ if (unlikely (glyph == (hb_codepoint_t) -1))
{
if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
glyph = 0;
* Maybe one day... */
/* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
- * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */
- virama_glyph = glyph;
+ * during shape planning... Instead, overwrite it here. */
+ virama_glyph.set_relaxed ((int) glyph);
}
*pglyph = glyph;
const indic_config_t *config;
bool is_old_spec;
- mutable hb_codepoint_t virama_glyph;
+ bool uniscribe_bug_compatible;
+ mutable hb_atomic_int_t virama_glyph;
would_substitute_feature_t rphf;
would_substitute_feature_t pref;
}
indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2');
- indic_plan->virama_glyph = (hb_codepoint_t) -1;
+ indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
+ indic_plan->virama_glyph.set_relaxed (-1);
/* Use zero-context would_substitute() matching for new-spec of the main
* Indic scripts, and scripts with one spec only, but not for old-specs.
return;
hb_codepoint_t virama;
- if (indic_plan->get_virama_glyph (font, &virama))
+ if (indic_plan->load_virama_glyph (font, &virama))
{
hb_face_t *face = font->face;
unsigned int count = buffer->len;
* last consonant.
*
* Reports suggest that in some scripts Uniscribe does this only if there
- * is *not* a Halant after last consonant already (eg. Kannada), while it
- * does it unconditionally in other scripts (eg. Malayalam). We don't
- * currently know about other scripts, so we single out Malayalam for now.
+ * is *not* a Halant after last consonant already. We know that is the
+ * case for Kannada, while it reorders unconditionally in other scripts,
+ * eg. Malayalam, Bengali, and Devanagari. We don't currently know about
+ * other scripts, so we blacklist Kannada.
*
* Kannada test case:
* U+0C9A,U+0CCD,U+0C9A,U+0CCD
* Malayalam test case:
* U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
* With lohit-ttf-20121122/Lohit-Malayalam.ttf
+ *
+ * Bengali test case:
+ * U+0998,U+09CD,U+09AF,U+09CD
+ * With Windows XP vrinda.ttf
+ * https://github.com/harfbuzz/harfbuzz/issues/1073
+ *
+ * Devanagari test case:
+ * U+091F,U+094D,U+0930,U+094D
+ * With chandas.ttf
+ * https://github.com/harfbuzz/harfbuzz/issues/1071
*/
if (indic_plan->is_old_spec)
{
- bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM;
+ bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
for (unsigned int i = base + 1; i < end; i++)
if (info[i].indic_category() == OT_H)
{
indic_position_t last_pos = POS_START;
for (unsigned int i = start; i < end; i++)
{
- if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+ if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
{
info[i].indic_position() = last_pos;
if (unlikely (info[i].indic_category() == OT_H &&
*
* We could use buffer->sort() for this, if there was no special
* reordering of pre-base stuff happening later...
+ * We don't want to merge_clusters all of that, which buffer->sort()
+ * would.
*/
- if (indic_plan->is_old_spec || end - base > 127)
+ if (indic_plan->is_old_spec || end - start > 127)
buffer->merge_clusters (base, end);
else
{
hb_buffer_t *buffer,
unsigned int start, unsigned int end)
{
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+
/* We treat placeholder/dotted-circle as if they are consonants, so we
* should just chain. Only if not in compatibility mode that is... */
- if (hb_options ().uniscribe_bug_compatible)
+ if (indic_plan->uniscribe_bug_compatible)
{
/* For dotted-circle, this is what Uniscribe does:
* If dotted-circle is the last glyph, it just does nothing.
hb_font_t *font,
hb_buffer_t *buffer)
{
- /* Note: This loop is extra overhead, but should not be measurable. */
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
bool has_broken_syllables = false;
unsigned int count = buffer->len;
hb_glyph_info_t *info = buffer->info;
else
buffer->next_glyph ();
}
-
buffer->swap_buffers ();
}
* phase, and that might have messed up our properties. Recover
* from a particular case of that where we're fairly sure that a
* class of OT_H is desired but has been lost. */
- if (indic_plan->virama_glyph)
+ /* We don't call load_virama_glyph(), since we know it's already
+ * loaded. */
+ hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+ if (virama_glyph)
{
- unsigned int virama_glyph = indic_plan->virama_glyph;
for (unsigned int i = start; i < end; i++)
if (info[i].codepoint == virama_glyph &&
_hb_glyph_info_ligated (&info[i]) &&
* defined as “after last standalone halant glyph, after initial matra
* position and before the main consonant”. If ZWJ or ZWNJ follow this
* halant, position is moved after it.
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
+ * and Devanagari shows that the behavior is best described as:
+ *
+ * "If ZWJ follows this halant, matra is NOT repositioned after this halant.
+ * If ZWNJ follows this halant, position is moved after it."
+ *
+ * Test case, with Adobe Devanagari or Nirmala UI:
+ *
+ * U+091F,U+094D,U+200C,U+092F,U+093F
+ * (Matra moves to the middle, after ZWNJ.)
+ *
+ * U+091F,U+094D,U+200D,U+092F,U+093F
+ * (Matra does NOT move, stays to the left.)
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1070
*/
if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
*/
if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
{
+ search:
while (new_pos > start &&
!(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
new_pos--;
if (is_halant (info[new_pos]) &&
info[new_pos].indic_position() != POS_PRE_M)
{
+#if 0 // See comment above
/* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
new_pos++;
+#endif
+ if (new_pos + 1 < end)
+ {
+ /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
+ if (info[new_pos + 1].indic_category() == OT_ZWJ)
+ {
+ /* Keep searching. */
+ if (new_pos > start)
+ {
+ new_pos--;
+ goto search;
+ }
+ }
+ /* -> If ZWNJ follows this halant, position is moved after it. */
+ if (info[new_pos + 1].indic_category() == OT_ZWNJ)
+ new_pos++;
+ }
}
else
new_pos = start; /* No move. */
* Uniscribe doesn't do this.
* TEST: U+0930,U+094D,U+0915,U+094B,U+094D
*/
- if (!hb_options ().uniscribe_bug_compatible &&
+ if (!indic_plan->uniscribe_bug_compatible &&
unlikely (is_halant (info[new_reph_pos]))) {
for (unsigned int i = base + 1; i < new_reph_pos; i++)
if (info[i].indic_category() == OT_M) {
/*
* Finish off the clusters and go home!
*/
- if (hb_options ().uniscribe_bug_compatible)
+ if (indic_plan->uniscribe_bug_compatible)
{
switch ((hb_tag_t) plan->props.script)
{
}
+static void
+preprocess_text_indic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+}
+
static bool
decompose_indic (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t ab,
override_features_indic,
data_create_indic,
data_destroy_indic,
- nullptr, /* preprocess_text */
+ preprocess_text_indic,
nullptr, /* postprocess_glyphs */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
decompose_indic,
compose_indic,
setup_masks_indic,
- nullptr, /* disable_otl */
+ HB_TAG_NONE, /* gpos_tag */
nullptr, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */