From 92df9d5bc4328cf8c0743851d01ba01cddb15b2c Mon Sep 17 00:00:00 2001 From: Victor Cebollada Date: Fri, 29 Jul 2016 08:45:55 +0100 Subject: [PATCH] [3.0] TextAbstraction - Characters removed from the Latin script. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * There are many characters in the range of the Latin script which are not considered part of it. It causes font selection issues if the default platform font is not a Latin font. i.e. -The default platform font is one that supports the Telugu script. -The text is "-- తెలుగు లిపి hello world". -The "-- " was asigned the LATIN, "తెలుగు లిపి " TELUGU and "hello world" LATIN. -The "--" is not really part of the Latin script. -The "--" is supported by the default platform font which is a Telugu font. -The Telugu font has been cached as a font that supports Latin but it doesn't. -The "hello world" will be shaped with the Telugu font which doesn't support the Latin script. Change-Id: I2a7e1a720cf6c3e7fb766aa7d9aa98ecf7bf6453 Signed-off-by: Victor Cebollada --- text/dali/devel-api/text-abstraction/script.cpp | 264 ++++++++++++++++++++- text/dali/devel-api/text-abstraction/script.h | 198 ++++++++++------ .../internal/text-abstraction/shaping-impl.cpp | 21 ++ 3 files changed, 400 insertions(+), 83 deletions(-) diff --git a/text/dali/devel-api/text-abstraction/script.cpp b/text/dali/devel-api/text-abstraction/script.cpp index 2b534f8..265e9aa 100644 --- a/text/dali/devel-api/text-abstraction/script.cpp +++ b/text/dali/devel-api/text-abstraction/script.cpp @@ -51,24 +51,87 @@ bool IsRightToLeftScript( Script script ) Script GetCharacterScript( Character character ) { - // Latin script: + // Latin script: It contains punctuation characters and symbols which are not part of the latin script. https://en.wikipedia.org/wiki/Latin_script_in_Unicode // 0x0000 - 0x007f C0 Controls and Basic Latin + // + // ASCII digits (not part of LATIN script): + // 0x0030 - 0x0039 + // + // ASCII punctuation and symbols (not part of LATIN script): + // 0x0020 - 0x002F + // 0x003A - 0x0040 + // 0x005B - 0x0060 + // 0x007B - 0x007E + // + // Controls (not part of LATIN script): + // 0x007F + // // 0x0080 - 0x00ff C1 Controls and Latin-1 Supplement + // + // Controls (not part of LATIN script): + // 0x0080 - 0x009F + // + // Punctuations and symbols (not part of LATIN script): + // 0x00A0 - 0x00BF + // + // Mathematical operators (not part of LATIN script): + // 0x00D7 + // 0x00F7 + // // 0x0100 - 0x017f Latin Extended-A // 0x0180 - 0x024f Latin Extended-B // 0x0250 - 0x02af IPA Extensions // 0x02b0 - 0x02ff Spacing Modifier Letters + // + // Punctuation (not part of LATIN script): + // 0x02B9 - 0x02BF + // // 0x1d00 - 0x1d7f Phonetic Extensions + // + // Uralic Phonetic (not part of LATIN script): + // 0x1D26 - 0x1D2B + // + // Subscripts and superscripts + // 0x1D5D - 0x1D61 + // 0x1D66 - 0x1D6A + // 0x1D78 + // // 0x1d80 - 0x1dbf Phonetic Extensions Supplement + // + // 0x1DBF (subscript or superscript. Not part of LATIN script ) + // // 0x1e00 - 0x1eff Latin Extended Additional // 0x2070 - 0x209f Superscripts and Subscripts - // 0x2100 - 0x214f Letterlike symbols - // 0x2150 - 0x218f Number Forms + // + // 0x2070 (not part of LATIN script) + // 0x2074 - 0x207E (not part of LATIN script) + // + // 0x2100 - 0x214f Letterlike symbols (not part of LATIN script) + // + // 0x212A - 0x212B (are part of LATIN script) + // 0x2132 (are part of LATIN script) + // 0x214E (are part of LATIN script) + // + // 0x2150 - 0x2189 Number Forms + // + // 0x2150 - 0x215F Fractions (not part of LATIN script) + // 0x2189 Fractions (not part of LATIN script) + // // 0x2c60 - 0x2c7f Latin Extended-C // 0xa720 - 0xa7ff Latin Extended-D + // + // 0xA720 - 0xA721 Uralic Phonetic (not part of LATIN script) + // 0xA788 (not part of LATIN script) + // 0xA789 - 0xA78A Budu (not part of LATIN script) + // // 0xab30 - 0xab6f Latin Extended-E + // // 0xfb00 - 0xfb06 Latin Alphabetic Presentation Forms // 0xff00 - 0xffef Halfwidth and Fullwidth Forms + // + // 0xFF00 - 0xFF20 HWFW Symbols (not part of LATIN script) + // 0xFF3B - 0xFF40 HWFW Symbols (not part of LATIN script) + // 0xFF5B - 0xFFEF HWFW Symbols (not part of LATIN script) // Brahmic scripts: // 0x0900 - 0x097f Devanagari @@ -172,6 +235,19 @@ Script GetCharacterScript( Character character ) // 6b. Additional transport and map symbols ( 1F681 - 1F6C5 ) // 6c. Other additional symbols ( 1F30D - 1F567 ) + // Symbols. Work around for these symbols. + // 0x25cb + // 0x25cf + // 0x25a1 + // 0x25a0 + // 0x2664 + // 0x2661 + // 0x2662 + // 0x2667 + // 0x2606 + // 0x25aa + // 0x262a + if( IsCommonScript( character ) ) { return COMMON; @@ -183,16 +259,63 @@ Script GetCharacterScript( Character character ) { if( character <= 0x077f ) { - if( character == 0x00A9 ) + if( ( 0x0030 <= character ) && ( character <= 0x0039 ) ) + { + return ASCII_DIGITS; + } + if( character <= 0x007E ) { - return EMOJI; // 5. Uncategorized: copyright sign + if( ( 0x0020 <= character ) && ( character <= 0x002F ) ) + { + return ASCII_PS; + } + if( ( 0x003A <= character ) && ( character <= 0x0040 ) ) + { + return ASCII_PS; + } + if( ( 0x005B <= character ) && ( character <= 0x0060 ) ) + { + return ASCII_PS; + } + if( ( 0x007B <= character ) && ( character <= 0x007E ) ) + { + return ASCII_PS; + } } - if( character == 0x00AE ) + if( ( 0x007F <= character ) && ( character <= 0x009F ) ) { - return EMOJI; // 5. Uncategorized: registered sign + // 0x007F is actually part of C0 Controls and Basic Latin. However, is the last and only control character of its block + // and the following characters of the next block are consecutive. + return C1_CONTROLS; + } + if( ( 0x00A0 <= character ) && ( character <= 0x00BF ) ) + { + if( character == 0x00A9 ) + { + return EMOJI; // 5. Uncategorized: copyright sign + } + if( character == 0x00AE ) + { + return EMOJI; // 5. Uncategorized: registered sign + } + + return C1_PS; + } + if( character == 0x00D7 ) + { + return C1_MATH; + } + if( character == 0x00F7 ) + { + return C1_MATH; } if( character <= 0x02ff ) { + if( ( 0x02B9 <= character ) && ( character <= 0x02BF ) ) + { + return SML_P; + } + return LATIN; } if( ( 0x0370 <= character ) && ( character <= 0x03ff ) ) @@ -318,6 +441,27 @@ Script GetCharacterScript( Character character ) } if( ( 0x1d00 <= character ) && ( character <= 0x1eff ) ) { + if( ( 0x1D26 <= character ) && ( character <= 0x1D2B ) ) + { + return PHONETIC_U; + } + if( ( 0x1D5D <= character ) && ( character <= 0x1D61 ) ) + { + return PHONETIC_SS; + } + if( ( 0x1D66 <= character ) && ( character <= 0x1D6A ) ) + { + return PHONETIC_SS; + } + if( character == 0x1D78 ) + { + return PHONETIC_SS; + } + if( character == 0x1DBF) + { + return PHONETIC_SS; + } + return LATIN; } } @@ -337,6 +481,15 @@ Script GetCharacterScript( Character character ) } if( ( 0x2070 <= character ) && ( character <= 0x209f ) ) { + if( character == 0x2070 ) + { + return NUMERIC_SS; + } + if( ( 0x2074 <= character ) && ( character <= 0x207E ) ) + { + return NUMERIC_SS; + } + return LATIN; } if( character == 0x20e3 ) @@ -351,10 +504,69 @@ Script GetCharacterScript( Character character ) { return EMOJI; // 5. Uncategorized: information source } - if( ( 0x2100 <= character ) && ( character <= 0x218f ) ) - { + if( ( 0x2100 <= character ) && ( character <= 0x2189 ) ) + { + if( ( 0x2100 <= character ) && ( character <= 0x214f ) ) + { + if( ( 0x212A <= character ) && ( character <= 0x212B ) ) + { + return LATIN; + } + if( character == 0x2132 ) + { + return LATIN; + } + if( character == 0x214E ) + { + return LATIN; + } + + return LETTER_LIKE; + } + if( ( 0x2150 <= character ) && ( character <= 0x215F ) ) + { + return FRACTIONS_NF; + } + if( character == 0x2189 ) + { + return FRACTIONS_NF; + } + return LATIN; } + + // Symbols + if( ( 0x25cb == character ) || + ( 0x25cf == character ) || + ( 0x25a1 == character ) ) + { + return SYMBOLS1; + } + + if( 0x25a0 == character ) + { + return SYMBOLS2; + } + + if( ( 0x2664 == character ) || + ( 0x2661 == character ) || + ( 0x2662 == character ) || + ( 0x2667 == character ) ) + { + return SYMBOLS3; + } + + if( ( 0x2606 == character ) || + ( 0x25aa == character ) ) + { + return SYMBOLS4; + } + + if( 0x262a == character ) + { + return SYMBOLS5; + } + // U+2194 5. Uncategorized: left right arrow // U+2B55 5. Uncategorized: heavy large circle if( ( 0x2194 <= character ) && ( character <= 0x2B55 ) ) @@ -441,6 +653,27 @@ Script GetCharacterScript( Character character ) } if( ( 0xa720 <= character ) && ( character <= 0xa7ff ) ) { + if( character == 0xA720 ) + { + return PHONETIC_U; + } + if( character == 0xA721 ) + { + return PHONETIC_U; + } + if( character == 0xA788 ) + { + return NON_LATIN_LED; + } + if( character == 0xA789 ) + { + return NON_LATIN_LED; + } + if( character == 0xA78A ) + { + return NON_LATIN_LED; + } + return LATIN; } if( ( 0xa960 <= character ) && ( character <= 0xa97f ) ) @@ -484,6 +717,19 @@ Script GetCharacterScript( Character character ) } if( ( 0xff00 <= character ) && ( character <= 0xffef ) ) { + if( ( 0xFF00 <= character ) && ( character <= 0xFF20 ) ) + { + return HWFW_S; + } + if( ( 0xFF3B <= character ) && ( character <= 0xFF40 ) ) + { + return HWFW_S; + } + if( ( 0xFF5B <= character ) && ( character <= 0xFFEF ) ) + { + return HWFW_S; + } + return LATIN; } if( ( 0x1ee00 <= character ) && ( character <= 0x1eeff ) ) diff --git a/text/dali/devel-api/text-abstraction/script.h b/text/dali/devel-api/text-abstraction/script.h index ee08ab2..753563e 100644 --- a/text/dali/devel-api/text-abstraction/script.h +++ b/text/dali/devel-api/text-abstraction/script.h @@ -36,84 +36,134 @@ namespace TextAbstraction */ enum Script { - COMMON, ///< Valid for all scripts. i.e white space or '\n'. - - CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... - GREEK, ///< The Greek script. Used by Greek. - LATIN, ///< The latin script. Used by many western languages and others around the world. - - ARABIC, ///< The arabic script. Used by Arab and Urdu among others. - HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - - ARMENIAN, ///< The Armenian script. Used by Armenian. - GEORGIAN, ///< The Georgian script. Used by Georgian. - - CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - HANGUL, ///< The Hangul jamo script. Used by Korean. - HIRAGANA, ///< The Hiragana script. Used by the Japanese. - KATAKANA, ///< The Katakana script. Used by the Japanese. - BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. - - BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. - DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. - GUJARATI, ///< The Gujarati script. Used by Gujarati. - GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. - KANNADA, ///< The Kannada script. Used by Kannada and Tulu. - MALAYALAM, ///< The Malayalam script. Used by Malayalam. - ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - SINHALA, ///< The Sinhala script. Used by Sinhala and Pali. - TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. - - LAO, ///< The Lao script. Used by the Lao language. - THAI, ///< The Thai script. Used by the Thai language - KHMER, ///< The Khmer script. Used by the Khmer language. - - EMOJI, ///< The Emoji which map to standardized Unicode characters. - - UNKNOWN ///< The script is unknown. + COMMON, ///< Valid for all scripts. i.e white space or '\n'. + + ASCII_DIGITS, ///< ASCII digits. + ASCII_PS, ///< ASCII punctuation and symbols. + + C1_CONTROLS, ///< Controls of the C1 Controls and Latin-1 Supplement unicode block. + C1_PS, ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block. + C1_MATH, ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block. + + SML_P, ///< Punctuation symbols of the Spacing Modifier Letters unicode block. + PHONETIC_U, ///< Uralic phonetic symbols of the Phonetic Extensions unicode block. + PHONETIC_SS, ///< Subscripts and superscripts of the Phonetic Extensions unicode block. + + NUMERIC_SS, ///< Numeric subscripts and superscripts. + + LETTER_LIKE, ///< Symbols of the Letterlike unicode block. + NUMBER_FORMS, ///< Number Forms unicode block. + FRACTIONS_NF, ///< Numeric fraction symbols of the Number Forms unicode block. + NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block. + HWFW_S, ///< Non latin symbols within the Halfwidth and fullwidth unicode block. + + CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + GREEK, ///< The Greek script. Used by Greek. + LATIN, ///< The latin script. Used by many western languages and others around the world. + + ARABIC, ///< The arabic script. Used by Arab and Urdu among others. + HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + ARMENIAN, ///< The Armenian script. Used by Armenian. + GEORGIAN, ///< The Georgian script. Used by Georgian. + + CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + HANGUL, ///< The Hangul jamo script. Used by Korean. + HIRAGANA, ///< The Hiragana script. Used by the Japanese. + KATAKANA, ///< The Katakana script. Used by the Japanese. + BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. + + BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. + BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. + DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + GUJARATI, ///< The Gujarati script. Used by Gujarati. + GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. + KANNADA, ///< The Kannada script. Used by Kannada and Tulu. + MALAYALAM, ///< The Malayalam script. Used by Malayalam. + ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. + SINHALA, ///< The Sinhala script. Used by Sinhala and Pali. + TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + + LAO, ///< The Lao script. Used by the Lao language. + THAI, ///< The Thai script. Used by the Thai language + KHMER, ///< The Khmer script. Used by the Khmer language. + + EMOJI, ///< The Emoji which map to standardized Unicode characters. + + SYMBOLS1, ///< Some symbols. + SYMBOLS2, ///< Some symbols. + SYMBOLS3, ///< Some symbols. + SYMBOLS4, ///< Some symbols. + SYMBOLS5, ///< Some symbols. + + UNKNOWN ///< The script is unknown. }; const char* const ScriptName[] = { - "COMMON", ///< Valid for all scripts. i.e white space or '\n'. - - "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... - "GREEK", ///< The Greek script. Used by Greek. - "LATIN", ///< The latin script. Used by many western languages and others around the world. - - "ARABIC", ///< The arabic script. Used by Arab and Urdu among others. - "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - - "ARMENIAN", ///< The Armenian script. Used by Armenian. - "GEORGIAN", ///< The Georgian script. Used by Georgian. - - "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - "HANGUL", ///< The Hangul jamo script. Used by Korean. - "HIRAGANA", ///< The Hiragana script. Used by the Japanese. - "KATAKANA", ///< The Katakana script. Used by the Japanese. - "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. - - "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. - "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. - "GUJARATI", ///< The Gujarati script. Used by Gujarati. - "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. - "KANNADA", ///< The Kannada script. Used by Kannada and Tulu. - "MALAYALAM", ///< The Malayalam script. Used by Malayalam. - "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali. - "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. - - "LAO", ///< The Lao script. Used by the Lao language. - "THAI", ///< The Thai script. Used by the Thai language - "KHMER", ///< The Khmer script. Used by the Khmer language. - - "EMOJI", ///< The Emoji which map to standardized Unicode characters. - - "UNKNOWN" ///< The script is unknown. + "COMMON", ///< Valid for all scripts. i.e white space or '\n'. + + "ASCII_DIGITS", ///< ASCII digits. + "ASCII_PS", ///< ASCII punctuation and symbols. + + "C1_CONTROLS", ///< Controls of the C1 Controls and Latin-1 Supplement unicode block. + "C1_PS", ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block. + "C1_MATH", ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block. + + "SML_P", ///< Punctuation symbols of the Spacing Modifier Letters unicode block. + "PHONETIC_U", ///< Uralic phonetic symbols of the Phonetic Extensions unicode block. + "PHONETIC_SS", ///< Subscripts and superscripts of the Phonetic Extensions unicode block. + + "NUMERIC_SS", ///< Numeric subscripts and superscripts. + + "LETTER_LIKE", ///< Symbols of the Letterlike unicode block. + "NUMBER_FORMS", ///< Number Forms unicode block. + "FRACTIONS_NF", ///< Numeric fraction symbols of the Number Forms unicode block. + "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block. + "HWFW_S", ///< Non latin symbols within the Halfwidth and fullwidth unicode block. + + "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + "GREEK", ///< The Greek script. Used by Greek. + "LATIN", ///< The latin script. Used by many western languages and others around the world. + + "ARABIC", ///< The arabic script. Used by Arab and Urdu among others. + "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + "ARMENIAN", ///< The Armenian script. Used by Armenian. + "GEORGIAN", ///< The Georgian script. Used by Georgian. + + "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + "HANGUL", ///< The Hangul jamo script. Used by Korean. + "HIRAGANA", ///< The Hiragana script. Used by the Japanese. + "KATAKANA", ///< The Katakana script. Used by the Japanese. + "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. + + "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. + "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. + "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + "GUJARATI", ///< The Gujarati script. Used by Gujarati. + "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. + "KANNADA", ///< The Kannada script. Used by Kannada and Tulu. + "MALAYALAM", ///< The Malayalam script. Used by Malayalam. + "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. + "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali. + "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + + "LAO", ///< The Lao script. Used by the Lao language. + "THAI", ///< The Thai script. Used by the Thai language + "KHMER", ///< The Khmer script. Used by the Khmer language. + + "EMOJI", ///< The Emoji which map to standardized Unicode characters. + + "SYMBOLS1", ///< Some symbols. + "SYMBOLS2", ///< Some symbols. + "SYMBOLS3", ///< Some symbols. + "SYMBOLS4", ///< Some symbols. + "SYMBOLS5", ///< Some symbols. + + "UNKNOWN" ///< The script is unknown. }; /** diff --git a/text/dali/internal/text-abstraction/shaping-impl.cpp b/text/dali/internal/text-abstraction/shaping-impl.cpp index ccdf4bc..ae87d94 100644 --- a/text/dali/internal/text-abstraction/shaping-impl.cpp +++ b/text/dali/internal/text-abstraction/shaping-impl.cpp @@ -47,6 +47,22 @@ const hb_script_t SCRIPT_TO_HARFBUZZ[] = { HB_SCRIPT_COMMON, + HB_SCRIPT_COMMON, // ASCII_DIGITS + HB_SCRIPT_COMMON, // ASCII_PS + + HB_SCRIPT_COMMON, // C1_CONTROLS + HB_SCRIPT_COMMON, // C1_PS + HB_SCRIPT_COMMON, // C1_MATH + HB_SCRIPT_COMMON, // SML_P + HB_SCRIPT_COMMON, // PHONETIC_U + HB_SCRIPT_COMMON, // PHONETIC_SS + HB_SCRIPT_COMMON, // NUMERIC_SS + HB_SCRIPT_COMMON, // LETTER_LIKE + HB_SCRIPT_COMMON, // NUMBER_FORMS + HB_SCRIPT_COMMON, // FRACTIONS_NF + HB_SCRIPT_COMMON, // NON_LATIN_LED + HB_SCRIPT_COMMON, // HWFW_S + HB_SCRIPT_CYRILLIC, HB_SCRIPT_GREEK, HB_SCRIPT_LATIN, @@ -80,6 +96,11 @@ const hb_script_t SCRIPT_TO_HARFBUZZ[] = HB_SCRIPT_KHMER, HB_SCRIPT_UNKNOWN, // EMOJI + HB_SCRIPT_UNKNOWN, // SYMBOLS1 + HB_SCRIPT_UNKNOWN, // SYMBOLS2 + HB_SCRIPT_UNKNOWN, // SYMBOLS3 + HB_SCRIPT_UNKNOWN, // SYMBOLS4 + HB_SCRIPT_UNKNOWN, // SYMBOLS5 HB_SCRIPT_UNKNOWN }; -- 2.7.4