From: Behdad Esfahbod Date: Thu, 21 Jul 2011 19:25:01 +0000 (-0400) Subject: Document normalization design X-Git-Tag: submit/2.0alpha-wayland/20121130.004132~9^2~49 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5d90a342e319068716429bf7af76c3896b61a0e5;p=profile%2Fivi%2Forg.tizen.video-player.git Document normalization design --- diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index a791e7c..6832779 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -25,20 +25,55 @@ */ #include "hb-ot-shape-private.hh" +#include "hb-ot-shape-complex-private.hh" HB_BEGIN_DECLS +/* + * HIGHLEVEL DESIGN: + * + * This file exports one main function: _hb_ot_shape_normalize(). + * + * This function closely reflects the Unicode Normalization Algorithm, + * yet it's different. The shaper an either prefer decomposed (NFD) or + * composed (NFC). + * + * In general what happens is that: each grapheme is decomposed in a chain + * of 1:2 decompositions, marks reordered, and then recomposed if desires, + * so far it's like Unicode Normalization. However, the decomposition and + * recomposition only happens if the font supports the resulting characters. + * + * The goals are: + * + * - Try to render all canonically equivalent strings similarly. To really + * achieve this we have to always do the full decomposition and then + * selectively recompose from there. It's kinda too expensive though, so + * we skip some cases. For example, if composed is desired, we simply + * don't touch 1-character clusters that are supported by the font, even + * though their NFC may be different. + * + * - When a font has a precomposed character for a sequence but the 'ccmp' + * feature in the font is not adequate, form use the precomposed character + * which typically has better mark positioning. + * + * - When a font does not support a character but supports its decomposition, + * well, use the decomposition. + * + * - The Indic shaper requests decomposed output. This will handle splitting + * matra for the Indic shaper. + */ + static bool get_glyph (hb_ot_shape_context_t *c, unsigned int i) { - hb_buffer_t *b = c->buffer; hb_codepoint_t glyph; - return hb_font_get_glyph (c->font, b->info[i].codepoint, 0, &glyph); + return hb_font_get_glyph (c->font, c->buffer->info[i].codepoint, 0, &glyph); } static bool decompose_single_char_cluster (hb_ot_shape_context_t *c, + bool recompose, unsigned int i) { return FALSE; @@ -46,22 +81,24 @@ decompose_single_char_cluster (hb_ot_shape_context_t *c, static bool handle_single_char_cluster (hb_ot_shape_context_t *c, + bool recompose, unsigned int i) { - /* If the single char is supported by the font, we're good. */ - if (get_glyph (c, i)) + /* If recomposing and the single char is supported by the font, we're good. */ + if (recompose && get_glyph (c, i)) return FALSE; /* Decompose */ - return decompose_single_char_cluster (c, i); + return decompose_single_char_cluster (c, recompose, i); } static bool handle_multi_char_cluster (hb_ot_shape_context_t *c, + bool recompose, unsigned int start, unsigned int end) { - /* If there's a variation-selector, give-up, it's just too hard. */ + /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */ for (unsigned int i = start; i < end; i++) if (unlikely (is_variation_selector (c->buffer->info[i].codepoint))) return FALSE; @@ -70,24 +107,33 @@ handle_multi_char_cluster (hb_ot_shape_context_t *c, } bool -_hb_normalize (hb_ot_shape_context_t *c) +_hb_ot_shape_normalize (hb_ot_shape_context_t *c) { - hb_buffer_t *b = c->buffer; + hb_buffer_t *buffer = c->buffer; bool changed = FALSE; + bool recompose = !hb_ot_shape_complex_prefer_decomposed (c->plan->shaper); + + buffer->clear_output (); + + unsigned int count = buffer->len; + for (buffer->i = 0; buffer->i < count;) + { - unsigned int count = b->len; - for (unsigned int i = 0; i < count;) { unsigned int end; - for (end = i + 1; end < count; end++) - if (b->info[i].cluster != b->info[end].cluster) + for (end = buffer->i + 1; end < count; end++) + if (buffer->info[buffer->i].cluster != buffer->info[end].cluster) break; - if (i + 1 == end) - changed |= handle_single_char_cluster (c, i); + + if (buffer->i + 1 == end) + changed |= handle_single_char_cluster (c, recompose, buffer->i); else - changed |= handle_multi_char_cluster (c, i, end); - i = end; + changed |= handle_multi_char_cluster (c, recompose, buffer->i, end); + while (buffer->i < end) + c->buffer->next_glyph (); } + buffer->swap (); + return changed; } diff --git a/src/hb-ot-shape-private.hh b/src/hb-ot-shape-private.hh index 96c436d..17b3c99 100644 --- a/src/hb-ot-shape-private.hh +++ b/src/hb-ot-shape-private.hh @@ -100,7 +100,7 @@ is_variation_selector (hb_codepoint_t unicode) } -HB_INTERNAL bool _hb_normalize (hb_ot_shape_context_t *c); +HB_INTERNAL bool _hb_ot_shape_normalize (hb_ot_shape_context_t *c); HB_END_DECLS diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index bffd075..d1c495f 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -254,11 +254,13 @@ static void hb_map_glyphs (hb_font_t *font, hb_buffer_t *buffer) { + hb_codepoint_t glyph; + if (unlikely (!buffer->len)) return; - hb_codepoint_t glyph; buffer->clear_output (); + unsigned int count = buffer->len - 1; for (buffer->i = 0; buffer->i < count;) { if (unlikely (is_variation_selector (buffer->info[buffer->i + 1].codepoint))) { @@ -363,7 +365,7 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c) hb_ensure_native_direction (c->buffer); - if (_hb_normalize (c)) + if (_hb_ot_shape_normalize (c)) /* Buffer contents changed, reset unicode_props */ hb_set_unicode_props (c->buffer); /* BUFFER: Set general_category and combining_class in var1 */