From b85800f9de8976a7418ef9df467d3080c6ab0199 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 31 Aug 2012 18:12:01 -0400 Subject: [PATCH] [Indic] Implement dotted-circle insertion for broken clusters No panic, we reeally insert dotted circle when it's absolutely broken. Fixes most of the dotted-circle cases against Uniscribe. (for Devanagari fixes 80% of them, for Khmer 70%; the rest look like Uniscribe being really bogus...) I had to make a decision. Apparently Uniscribe adds one dotted circle to each broken character. I tried that, but that goes wrong easily with split matras. So I made it add only one dotted circle to an entire broken syllable tail. As in: "if there was a dotted circle here, this would have formed a correct cluster." That works better for split stuff, and I like it more. --- src/hb-buffer-private.hh | 1 + src/hb-buffer.cc | 10 +++++ src/hb-ot-shape-complex-indic-machine.rl | 4 +- src/hb-ot-shape-complex-indic-private.hh | 2 +- src/hb-ot-shape-complex-indic.cc | 67 +++++++++++++++++++++++++++++--- src/hb-ot-shape-normalize-private.hh | 1 + src/hb-ot-shape-normalize.cc | 11 +++--- 7 files changed, 83 insertions(+), 13 deletions(-) diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh index 9fa1e4b..456e1b8 100644 --- a/src/hb-buffer-private.hh +++ b/src/hb-buffer-private.hh @@ -152,6 +152,7 @@ struct hb_buffer_t { HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index); /* Makes a copy of the glyph at idx to output and replace glyph_index */ HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index); + HB_INTERNAL void output_info (hb_glyph_info_t &glyph_info); /* Copies glyph at idx to output but doesn't advance idx */ HB_INTERNAL void copy_glyph (void); /* Copies glyph at idx to output and advance idx. diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 3f039d0..f25a8bc 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -268,6 +268,16 @@ hb_buffer_t::output_glyph (hb_codepoint_t glyph_index) } void +hb_buffer_t::output_info (hb_glyph_info_t &glyph_info) +{ + if (unlikely (!make_room_for (0, 1))) return; + + out_info[out_len] = glyph_info; + + out_len++; +} + +void hb_buffer_t::copy_glyph (void) { if (unlikely (!make_room_for (0, 1))) return; diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 6c76d24..c9309e9 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -75,12 +75,14 @@ halant_or_matra_group = (final_halant_group | matra_group{0,4}); consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail; vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail); standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail; +broken_cluster = n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail; other = any; main := |* consonant_syllable => { found_syllable (consonant_syllable); }; vowel_syllable => { found_syllable (vowel_syllable); }; standalone_cluster => { found_syllable (standalone_cluster); }; + broken_cluster => { found_syllable (broken_cluster); *had_broken_cluster = true; }; other => { found_syllable (non_indic_cluster); }; *|; @@ -98,7 +100,7 @@ main := |* } HB_STMT_END static void -find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer) +find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, bool *had_broken_cluster) { unsigned int p, pe, eof, ts, te, act; int cs; diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 79daba5..91b0be5 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -300,7 +300,7 @@ is_halant_or_coeng (const hb_glyph_info_t &info) } static inline void -set_indic_properties (hb_glyph_info_t &info) +set_indic_properties (hb_glyph_info_t &info) { hb_codepoint_t u = info.codepoint; unsigned int type = get_indic_categories (u); diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 15b00b0..2417ab7 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -770,6 +770,15 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, } static void +initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + /* We already inserted dotted-circles, so just call the standalone_cluster. */ + initial_reordering_standalone_cluster (plan, buffer, start, end); +} + +static void initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer HB_UNUSED, unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) @@ -799,23 +808,63 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan, case consonant_syllable: initial_reordering_consonant_syllable (plan, buffer, start, end); return; case vowel_syllable: initial_reordering_vowel_syllable (plan, buffer, start, end); return; case standalone_cluster: initial_reordering_standalone_cluster (plan, buffer, start, end); return; - case broken_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; + case broken_cluster: initial_reordering_broken_cluster (plan, buffer, start, end); return; case non_indic_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; } } static void +insert_dotted_circles (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_codepoint_t dottedcircle_glyph; + if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph)) + return; + + hb_glyph_info_t dottedcircle; + dottedcircle.codepoint = 0x25CC; + set_indic_properties (dottedcircle); + dottedcircle.codepoint = dottedcircle_glyph; + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len) + { + unsigned int syllable = buffer->cur().syllable(); + syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) + { + hb_glyph_info_t info = dottedcircle; + info.cluster = buffer->cur().cluster; + info.mask = buffer->cur().mask; + info.syllable() = buffer->cur().syllable(); + buffer->output_info (info); + last_syllable = syllable; + } + buffer->next_glyph (); + } + + buffer->swap_buffers (); +} + +static void initial_reordering (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) { - unsigned int count = buffer->len; - if (unlikely (!count)) return; - update_consonant_positions (plan, font, buffer); - find_syllables (plan, buffer); + + bool had_broken_clusters = false; + find_syllables (plan, buffer, &had_broken_clusters); + if (unlikely (had_broken_clusters)) + insert_dotted_circles (plan, font, buffer); hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + if (unlikely (!count)) return; unsigned int last = 0; unsigned int last_syllable = info[0].syllable(); for (unsigned int i = 1; i < count; i++) @@ -1170,6 +1219,12 @@ final_reordering (const hb_ot_shape_plan_t *plan, } +static hb_ot_shape_normalization_mode_t +normalization_preference_indic (const hb_ot_shape_plan_t *plan) +{ + return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; +} + const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = { "indic", @@ -1178,7 +1233,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = data_create_indic, data_destroy_indic, NULL, /* preprocess_text */ - NULL, /* normalization_preference */ + normalization_preference_indic, setup_masks_indic, false, /* zero_width_attached_marks */ }; diff --git a/src/hb-ot-shape-normalize-private.hh b/src/hb-ot-shape-normalize-private.hh index 462b87d..c5fcbea 100644 --- a/src/hb-ot-shape-normalize-private.hh +++ b/src/hb-ot-shape-normalize-private.hh @@ -38,6 +38,7 @@ enum hb_ot_shape_normalization_mode_t { HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED, HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL, /* including base-to-base composition */ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index 93dd00c..f4d8330 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -414,10 +414,10 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int } static inline bool -decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool recompose, unsigned int end) +decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool short_circuit, unsigned int end) { if (likely (buffer->idx + 1 == end)) - return decompose_current_character (font, buffer, recompose); + return decompose_current_character (font, buffer, short_circuit); else return decompose_multi_char_cluster (font, buffer, end); } @@ -437,7 +437,8 @@ void _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, hb_ot_shape_normalization_mode_t mode) { - bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED && + mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; bool can_use_recompose = false; unsigned int count; @@ -459,7 +460,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, if (buffer->cur().cluster != buffer->info[end].cluster) break; - can_use_recompose = decompose_cluster (font, buffer, recompose, end) || can_use_recompose; + can_use_recompose = decompose_cluster (font, buffer, short_circuit, end) || can_use_recompose; } buffer->swap_buffers (); @@ -495,7 +496,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, } - if (!recompose) + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED) return; /* Third round, recompose */ -- 2.7.4