From 6ef939455f925be5e52d474105da740da6fafe21 Mon Sep 17 00:00:00 2001 From: Jiang Jiang Date: Wed, 16 Nov 2011 15:27:28 +0100 Subject: [PATCH] Fix HarfBuzz Thai character SARA AM handling This patch is contributed by Thanomsub Noppaburana from libthai. It added a special thai shaping function to handle SARA AM character for fonts without OpenType rules to support it, like Nokia Pure Text AS. With modification to logClusters assignment to make sure that QTextLine::glyphRuns(int from, int length) returns correct glyphs. Task-number: QTBUG-21206 Change-Id: I5a78ee1ab2b4c874c7d0df17d4ee6d264ed5a790 Reviewed-by: Lars Knoll --- .../harfbuzz/src/harfbuzz-shaper-private.h | 1 + src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp | 24 +- src/3rdparty/harfbuzz/src/harfbuzz-thai.c | 367 +++++++++++++++++++-- 3 files changed, 352 insertions(+), 40 deletions(-) diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper-private.h b/src/3rdparty/harfbuzz/src/harfbuzz-shaper-private.h index e1360c7..05214e7 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper-private.h +++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper-private.h @@ -111,6 +111,7 @@ extern HB_Bool HB_HangulShape(HB_ShaperItem *shaper_item); extern HB_Bool HB_MyanmarShape(HB_ShaperItem *shaper_item); extern HB_Bool HB_KhmerShape(HB_ShaperItem *shaper_item); extern HB_Bool HB_IndicShape(HB_ShaperItem *shaper_item); +extern HB_Bool HB_ThaiShape(HB_ShaperItem *shaper_item); extern void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes); diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp index 7f4bb0c..0734442 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp +++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp @@ -630,7 +630,7 @@ const HB_ScriptEngine HB_ScriptEngines[] = { // Sinhala { HB_IndicShape, HB_IndicAttributes }, // Thai - { HB_BasicShape, HB_ThaiAttributes }, + { HB_ThaiShape, HB_ThaiAttributes }, // Lao { HB_BasicShape, 0 }, // Tibetan @@ -1337,18 +1337,18 @@ HB_Bool HB_OpenTypePosition(HB_ShaperItem *item, int availableGlyphs, HB_Bool do #ifdef OT_DEBUG if (doLogClusters) { - DEBUG("log clusters after shaping:"); - for (int j = 0; j < length; j++) - DEBUG(" log[%d] = %d", j, item->log_clusters[j]); + DEBUG("log clusters after shaping:\n"); + for (unsigned int j = 0; j < item->item.length; j++) + DEBUG(" log[%d] = %d\n", j, item->log_clusters[j]); } - DEBUG("final glyphs:"); - for (int i = 0; i < (int)hb_buffer->in_length; ++i) - DEBUG(" glyph=%4x char_index=%d mark: %d cmp: %d, clusterStart: %d advance=%d/%d offset=%d/%d", - glyphs[i].glyph, hb_buffer->in_string[i].cluster, glyphs[i].attributes.mark, - glyphs[i].attributes.combiningClass, glyphs[i].attributes.clusterStart, - glyphs[i].advance.x.toInt(), glyphs[i].advance.y.toInt(), - glyphs[i].offset.x.toInt(), glyphs[i].offset.y.toInt()); - DEBUG("-----------------------------------------"); + DEBUG("final glyphs:\n"); + for (unsigned int i = 0; i < item->num_glyphs; ++i) + DEBUG(" glyph=%4x char_index=%d mark: %d cmp: %d, clusterStart: %d advance=%d offset=%d/%d\n", + glyphs[i], face->buffer->in_string[i].cluster, attributes[i].mark, + attributes[i].combiningClass, attributes[i].clusterStart, + item->advances[i] >> 6, + item->offsets[i].x >> 6, item->offsets[i].y >> 6); + DEBUG("-----------------------------------------\n"); #endif return true; } diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c index e80e2c5..262cee6 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c +++ b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c @@ -29,15 +29,41 @@ #include #include -typedef int (*th_brk_def)(const char*, int[], int); +#define LIBTHAI_MAJOR 0 + +/* + * if libthai changed please update these codes too. + */ +struct thcell_t { + unsigned char base; /**< base character */ + unsigned char hilo; /**< upper/lower vowel/diacritic */ + unsigned char top; /**< top-level mark */ +}; +typedef int (*th_brk_def) (const unsigned char*, int*, size_t); +typedef int (*th_render_cell_tis_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am); +typedef int (*th_render_cell_win_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am); +typedef int (*th_render_cell_mac_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am); +typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int); + +/* libthai releated function handles */ static th_brk_def th_brk = 0; -static int libthai_resolved = 0; +static th_next_cell_def th_next_cell = 0; +static th_render_cell_tis_def th_render_cell_tis = 0; +static th_render_cell_win_def th_render_cell_win = 0; +static th_render_cell_mac_def th_render_cell_mac = 0; -static void resolve_libthai() -{ - if (!th_brk) - th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk"); - libthai_resolved = 1; +static int init_libthai() { + if (!th_brk || !th_next_cell || !th_render_cell_tis || !th_render_cell_win || !th_render_cell_mac) { + th_brk = (th_brk_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_brk"); + th_next_cell = (th_next_cell_def)HB_Library_Resolve("thai", LIBTHAI_MAJOR, "th_next_cell"); + th_render_cell_tis = (th_render_cell_tis_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_tis"); + th_render_cell_win = (th_render_cell_win_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_win"); + th_render_cell_mac = (th_render_cell_mac_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_mac"); + } + if (th_brk && th_next_cell && th_render_cell_tis && th_render_cell_win && th_render_cell_mac) + return 1; + else + return 0; } static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr) @@ -57,46 +83,329 @@ static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr) result[len] = 0; } -static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes) +/* + * --------------------------------------------------------------------------- + * Thai Shaper / Attributes + * --------------------------------------------------------------------------- + */ + +/* + * USe basic_features prepare for future adding. + */ +#ifndef NO_OPENTYPE +static const HB_OpenTypeFeature thai_features[] = { + { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { HB_MAKE_TAG('l', 'i', 'g', 'a'), CcmpProperty }, + { HB_MAKE_TAG('c', 'l', 'i', 'g'), CcmpProperty }, + {0, 0} +}; +#endif + +/* TIS-to-Unicode glyph maps for characters 0x80-0xff */ +static int tis620_0[128] = { + /**/ 0, 0, 0, 0, 0, 0, 0, 0, + /**/ 0, 0, 0, 0, 0, 0, 0, 0, + /**/ 0, 0, 0, 0, 0, 0, 0, 0, + /**/ 0, 0, 0, 0, 0, 0, 0, 0, + 0x0020, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, + 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, + 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, + 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, + 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, + 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, + 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, + 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f, + 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, + 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, + 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, + 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0, 0, 0, 0 +}; + +static int tis620_1[128] = { + 0xf89e, 0, 0, 0xf88c, 0xf88f, 0xf892, 0xf895, 0xf898, + 0xf88b, 0xf88e, 0xf891, 0xf894, 0xf897, 0, 0, 0xf899, + 0xf89a, 0, 0xf884, 0xf889, 0xf885, 0xf886, 0xf887, 0xf888, + 0xf88a, 0xf88d, 0xf890, 0xf893, 0xf896, 0, 0, 0, + /**/ 0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, + 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, + 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, + 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, + 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, + 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, + 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, + 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f, + 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, + 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0, 0x0e4f, + 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, + 0x0e58, 0x0e59, 0, 0, 0xf89b, 0xf89c, 0xf89d, 0 +}; + +static int tis620_2[128] = { + 0xf700, 0xf701, 0xf702, 0xf703, 0xf704, 0x2026, 0xf705, 0xf706, + 0xf707, 0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e, + 0xf70f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717, + 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, + 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, + 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, + 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, + 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, + 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, + 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, + 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f, + 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, + 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, + 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, + 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xf718, 0xf719, 0xf71a, 0 +}; + +enum ThaiFontType { + TIS, + WIN, + MAC, +}; + +static int thai_get_glyph_index (ThaiFontType font_type, unsigned char c) +{ + switch (font_type){ + case TIS: return (c & 0x80) ? tis620_0[c & 0x7f] : c; + case WIN: return (c & 0x80) ? tis620_1[c & 0x7f] : c; + case MAC: return (c & 0x80) ? tis620_2[c & 0x7f] : c; + default: return 0; + } +} + +static int thai_contain_glyphs (HB_ShaperItem *shaper_item, const int glyph_map[128]) +{ + unsigned char c; + + for (c = 0; c < 0x80; c++) { + if ( glyph_map[c] ) { + if ( !shaper_item->font->klass->canRender (shaper_item->font, (HB_UChar16 *) &glyph_map[c], 1) ) + return 0; + } + } + return 1; +} + +static ThaiFontType getThaiFontType(HB_ShaperItem *shaper_item) +{ + if ( thai_contain_glyphs (shaper_item, tis620_2) ) + return MAC; + else if ( thai_contain_glyphs (shaper_item, tis620_1) ) + return WIN; + else + return TIS; +} + +/* + * convert to the correct display level of THAI vowels and marks. + */ +static HB_Bool HB_ThaiConvertStringToGlyphIndices (HB_ShaperItem *item) { char s[128]; char *cstr = s; - int brp[128]; - int *break_positions = brp; - hb_uint32 numbreaks; - hb_uint32 i; + const HB_UChar16 *string = item->string + item->item.pos; + const hb_uint32 len = item->item.length; + unsigned short *logClusters = item->log_clusters; + hb_uint32 i = 0, slen = 0; - if (!libthai_resolved) - resolve_libthai(); - - if (!th_brk) - return; + if (!init_libthai()) + return HB_BasicShape (item); if (len >= 128) cstr = (char *)malloc(len*sizeof(char) + 1); + if (!cstr) + return HB_BasicShape (item); + to_tis620(string, len, cstr); - numbreaks = th_brk(cstr, break_positions, 128); - if (numbreaks > 128) { - break_positions = (int *)malloc(numbreaks * sizeof(int)); - numbreaks = th_brk(cstr, break_positions, numbreaks); + /* Get font type */ + static ThaiFontType font_type; + static HB_Font itemFont; + if (itemFont != item->font) { + font_type = getThaiFontType (item); + itemFont = item->font; + } + + /* allocate temporary glyphs buffers */ + HB_STACKARRAY (HB_UChar16, glyphString, (item->item.length * 2)); + + while (i < item->item.length) { + struct thcell_t tis_cell; + unsigned char rglyphs[4]; + int cell_length; + int lgn = 0; + HB_Bool haveSaraAm = false; + + cell_length = th_next_cell ((const unsigned char *)cstr + i, len - i, &tis_cell, true); /* !item->fixedPitch); */ + haveSaraAm = (cstr[i + cell_length - 1] == (char)0xd3); + + /* set shaper item's log_clusters */ + logClusters[i] = slen; + for (int j = 1; j < cell_length; j++) { + logClusters[i + j] = logClusters[i]; + } + + /* Find Logical Glyphs by font type */ + switch (font_type) { + case TIS: lgn = th_render_cell_tis (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break; + case WIN: lgn = th_render_cell_mac (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break; + case MAC: lgn = th_render_cell_win (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break; + } + + /* Add glyphs to glyphs string and setting some attributes */ + for (int lgi = 0; lgi < lgn; lgi++) { + if ( rglyphs[lgi] == 0xdd/*TH_BLANK_BASE_GLYPH*/ ) { + //if ( !item->fixedPitch ) { + glyphString[slen++] = C_DOTTED_CIRCLE; + item->attributes[slen-1].dontPrint = true; // FIXME this will hide all dotted circle + //} + } + else { + glyphString[slen++] = (HB_UChar16) thai_get_glyph_index (font_type, rglyphs[lgi]); + } + } + + if (haveSaraAm) { + logClusters[i + cell_length - 1] = slen - 1; // Set logClusters before NIKAHIT + if (tis_cell.top != 0) + logClusters[i + cell_length - 2] = slen - 2; // Set logClusters before NIKAHIT when tis_cell has top + if (logClusters[i + cell_length - 1] > slen) + logClusters[i + cell_length - 1] = 0; + } + + i += cell_length; + } + glyphString[slen] = (HB_UChar16) '\0'; + + /* for check, should reallocate space or not */ + HB_Bool spaceOK = (item->num_glyphs >= slen); + + /* Convert to Glyph indices */ + HB_Bool haveGlyphs = item->font->klass->convertStringToGlyphIndices ( + item->font, + glyphString, slen, + item->glyphs, &item->num_glyphs, + item->shaperFlags); + + HB_FREE_STACKARRAY (glyphString); + + if (len >= 128) + free(cstr); + + return (haveGlyphs && spaceOK); +} + +/* + * set the glyph attributes heuristically. + */ +static void HB_ThaiHeuristicSetGlyphAttributes (HB_ShaperItem *item) +{ + /* Set Glyph Attributes */ + hb_uint32 iCluster = 0; + hb_uint32 length = item->item.length; + while (iCluster < length) { + int cluster_start = item->log_clusters[iCluster]; + ++iCluster; + while (iCluster < length && item->log_clusters[iCluster] == cluster_start) { + ++iCluster; + } + int cluster_end = (iCluster < length) ? item->log_clusters[iCluster] : item->num_glyphs; + item->attributes[cluster_start].clusterStart = true; + for (int i = cluster_start + 1; i < cluster_end; i++) { + item->attributes[i].clusterStart = false; + } + } +} + +/* + * THAI Shaping. + */ +HB_Bool HB_ThaiShape (HB_ShaperItem *shaper_item) +{ + if ( !HB_ThaiConvertStringToGlyphIndices (shaper_item) ) + return false; + + HB_ThaiHeuristicSetGlyphAttributes (shaper_item); + +#ifndef NO_OPENTYPE + const int availableGlyphs = shaper_item->num_glyphs; + if ( HB_SelectScript (shaper_item, thai_features) ) { + HB_OpenTypeShape (shaper_item, /*properties*/0); + return HB_OpenTypePosition (shaper_item, availableGlyphs, /*doLogClusters*/false); } +#endif + + HB_HeuristicPosition (shaper_item); + return true; +} + +/* + * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI. + */ +static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes) +{ + char s[128]; + char *cstr = s; + int *break_positions = 0; + int brp[128]; + int brp_size = 0; + hb_uint32 numbreaks, i, j, cell_length; + struct thcell_t tis_cell; + + if (!init_libthai()) + return ; + + if (len >= 128) + cstr = (char *)malloc(len*sizeof(char) + 1); + + to_tis620(string, len, cstr); for (i = 0; i < len; ++i) { attributes[i].lineBreakType = HB_NoBreak; attributes[i].wordBoundary = FALSE; } - for (i = 0; i < numbreaks; ++i) { - if (break_positions[i] > 0) { - attributes[break_positions[i]-1].lineBreakType = HB_Break; - attributes[break_positions[i]-1].wordBoundary = TRUE; + if (len > 128) { + break_positions = (int*) malloc (sizeof(int) * len); + memset (break_positions, 0, sizeof(int) * len); + brp_size = len; + } + else { + break_positions = brp; + brp_size = 128; + } + + if (break_positions) { + attributes[0].wordBoundary = TRUE; + numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size); + for (i = 0; i < numbreaks; ++i) { + attributes[break_positions[i]].wordBoundary = TRUE; + if (break_positions[i] > 0) + attributes[break_positions[i]-1].lineBreakType = HB_Break; } + + if (break_positions != brp) + free(break_positions); } - if (break_positions != brp) - free(break_positions); + /* manage charStop */ + i = 0; + while (i < len) { + cell_length = th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true); + + attributes[i].charStop = true; + for (j = 1; j < cell_length; j++) + attributes[i + j].charStop = false; + + /* Set charStop for SARA AM */ + if (cstr[i + cell_length - 1] == (char)0xd3) + attributes[i + cell_length - 1].charStop = true; + + i += cell_length; + } if (len >= 128) free(cstr); @@ -105,7 +414,9 @@ static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttri void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) { assert(script == HB_Script_Thai); + const HB_UChar16 *uc = text + from; attributes += from; - thaiWordBreaks(text + from, len, attributes); + HB_UNUSED(script); + HB_ThaiAssignAttributes(uc, len, attributes); } -- 2.7.4