From 34c215036f5fcdc7599b1ab0591b56dbb3811902 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 23 Jul 2012 23:51:29 -0400 Subject: [PATCH] [Indic] Improve Sinhala base algorithm and reph positioning Sinhala does not have half forms. And most (all?) consonants can be base, except when preceded by ZWJ, which would request a subjoined form. Hence switch the base algorithm to categorize with Khmer, start search at start, and stop at a ZWJ. Also, mark all pos=base consonants after base to be subjoined. Mark base itself to have pos=base. Finally, adjust Sinhala's reph position to after-main. Brings down Sinhala failures from 455 to 328 (0.120656%). --- src/hb-ot-shape-complex-indic.cc | 18 +++++++++++++++++- .../shaper-indic/indic/script-sinhala/misc/misc.txt | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index e7b70c8..83d7ab5 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -501,6 +501,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff switch ((hb_tag_t) buffer->props.script) { + case HB_SCRIPT_SINHALA: case HB_SCRIPT_KHMER: base_pos = BASE_FIRST; break; @@ -557,6 +558,19 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff if (!has_reph) base = limit; + + /* Find the last base consonant that is not blocked by ZWJ. If there is + * a ZWJ before a bse consonant, that would request a subjoined form. */ + for (unsigned int i = limit; i < end; i++) + if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C) + base = i; + else if (info[i].indic_category() == OT_ZWJ) + break; + + /* Mark all subsequent consonants as below. */ + for (unsigned int i = base + 1; i < end; i++) + if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C) + info[i].indic_position() = POS_BELOW_C; } if (base < start) @@ -570,6 +584,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff has_reph = false; } } + if (base < end) + info[base].indic_position() = POS_BASE_C; /* 2. Decompose and reorder Matras: @@ -931,6 +947,7 @@ final_reordering_syllable (hb_buffer_t *buffer, { case HB_SCRIPT_MALAYALAM: case HB_SCRIPT_ORIYA: + case HB_SCRIPT_SINHALA: reph_pos = REPH_AFTER_MAIN; break; @@ -945,7 +962,6 @@ final_reordering_syllable (hb_buffer_t *buffer, default: case HB_SCRIPT_DEVANAGARI: case HB_SCRIPT_GUJARATI: - case HB_SCRIPT_SINHALA: reph_pos = REPH_BEFORE_POSTSCRIPT; break; diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt index 03a0fae..c4f6b6b 100644 --- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt @@ -30,3 +30,4 @@ ශී‍්‍ර ස්ට්‍රේ ග්‍යෙ +ර්‍ය්‍ය -- 2.7.4