From 94cf71e121ca58c2ea738ae0711a564f1f8eb11a Mon Sep 17 00:00:00 2001 From: Victor Cebollada Date: Fri, 22 May 2015 15:17:05 +0100 Subject: [PATCH] Scripts added. Cyrillic, Georgian, Greek and Armenian. Change-Id: Ie8ca2ea98fc142ba0a4ce727264f91b1374d526c Signed-off-by: Victor Cebollada --- text/dali/devel-api/text-abstraction/script.cpp | 58 ++++++++++++++++++ text/dali/devel-api/text-abstraction/script.h | 70 ++++++++++++++-------- .../internal/text-abstraction/shaping-impl.cpp | 32 +++++++--- 3 files changed, 127 insertions(+), 33 deletions(-) diff --git a/text/dali/devel-api/text-abstraction/script.cpp b/text/dali/devel-api/text-abstraction/script.cpp index 4985049..fe817d6 100644 --- a/text/dali/devel-api/text-abstraction/script.cpp +++ b/text/dali/devel-api/text-abstraction/script.cpp @@ -139,6 +139,24 @@ Script GetCharacterScript( Character character ) // 0x0591 - 0x05f4 Hebrew // 0xfb1d - 0xfb4f Hebrew subset of Alphabetic Presentation Forms + // Cyrillic script + // 0x0400 - 0x04ff Cyrillic + // 0x0500 - 0x052f Cyrillic suplement + // 0x2de0 - 0x2dff Cyrillic Extended-A + // 0xa640 - 0xa69f Cyrillic Extended-B + + // Georgian script + // 0x10a0 - 0x10ff Georgian + // 0x2d00 - 0x2d2f Georgian suplement + + // Greek script + // 0x0370 - 0x03ff Greek & Coptic + // 0x1f00 - 0x1fff Greek Extended + + // Armenian script + // 0x0530 - 0x058f Armenian + // 0xfb13 - 0xfb17 Armenian subset of Alphabetic prefentation forms + // The Emoji which map to standardized Unicode characters // 1. Emoticons ( 1F601 - 1F64F ) // 2. Dingbats ( 2702 - 27B0 ) @@ -167,6 +185,22 @@ Script GetCharacterScript( Character character ) { return LATIN; } + if( ( 0x0370 <= character ) && ( character <= 0x03ff ) ) + { + return GREEK; + } + if( ( 0x0400 <= character ) && ( character <= 0x04ff ) ) + { + return CYRILLIC; + } + if( ( 0x0500 <= character ) && ( character <= 0x052f ) ) + { + return CYRILLIC; + } + if( ( 0x0530 <= character ) && ( character <= 0x058f ) ) + { + return ARMENIAN; + } if( ( 0x0591 <= character ) && ( character <= 0x05f4 ) ) { return HEBREW; @@ -256,6 +290,10 @@ Script GetCharacterScript( Character character ) { return BURMESE; } + if( ( 0x10a0 <= character ) && ( character <= 0x10ff ) ) + { + return GEORGIAN; + } if( ( 0x1100 <= character ) && ( character <= 0x11ff ) ) { return HANGUL; @@ -275,6 +313,10 @@ Script GetCharacterScript( Character character ) } else // > 0x1eff { + if( ( 0x1f00 <= character ) && ( character <= 0x1fff ) ) + { + return GREEK; + } if( character == 0x203c ) { return EMOJI; // 5. Uncategorized: double exclamation mark @@ -319,6 +361,14 @@ Script GetCharacterScript( Character character ) { if( character <= 0xfdff ) { + if( ( 0x2d00 <= character ) && ( character <= 0x2d2f ) ) + { + return GEORGIAN; + } + if( ( 0x2de0 <= character ) && ( character <= 0x2dff ) ) + { + return CYRILLIC; + } if( ( 0x2e80 <= character ) && ( character <= 0x2eff ) ) { return CJK; @@ -367,6 +417,10 @@ Script GetCharacterScript( Character character ) { return CJK; } + if( ( 0xa640 <= character ) && ( character <= 0xa69f ) ) + { + return CYRILLIC; + } if( ( 0xa720 <= character ) && ( character <= 0xa7ff ) ) { return LATIN; @@ -391,6 +445,10 @@ Script GetCharacterScript( Character character ) { return LATIN; } + if( ( 0xfb13 <= character ) && ( character <= 0xfb17 ) ) + { + return ARMENIAN; + } if( ( 0xfb1d <= character ) && ( character <= 0xfb4f ) ) { return HEBREW; diff --git a/text/dali/devel-api/text-abstraction/script.h b/text/dali/devel-api/text-abstraction/script.h index 3f33208..9341f98 100644 --- a/text/dali/devel-api/text-abstraction/script.h +++ b/text/dali/devel-api/text-abstraction/script.h @@ -36,55 +36,77 @@ namespace TextAbstraction */ enum Script { + CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + GREEK, ///< The Greek script. Used by Greek. LATIN, ///< The latin script. Used by many western languages and others around the world. + ARABIC, ///< The arabic script. Used by Arab and Urdu among others. - DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + ARMENIAN, ///< The Armenian script. Used by Armenian. + GEORGIAN, ///< The Georgian script. Used by Georgian. + + CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + HANGUL, ///< The Hangul jamo script. Used by Korean. + HIRAGANA, ///< The Hiragana script. Used by the Japanese. + KATAKANA, ///< The Katakana script. Used by the Japanese. + BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. + BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. + DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. GUJARATI, ///< The Gujarati script. Used by Gujarati. - ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. KANNADA, ///< The Kannada script. Used by Kannada and Tulu. MALAYALAM, ///< The Malayalam script. Used by Malayalam. + ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. SINHALA, ///< The Sinhala script. Used by Sinhala and Pali. - CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - HANGUL, ///< The Hangul jamo script. Used by Korean. - KHMER, ///< The Khmer script. Used by the Khmer language. + TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + LAO, ///< The Lao script. Used by the Lao language. THAI, ///< The Thai script. Used by the Thai language - BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. - HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - HIRAGANA, ///< The Hiragana script. Used by the Japanese. - KATAKANA, ///< The Katakana script. Used by the Japanese. + KHMER, ///< The Khmer script. Used by the Khmer language. + EMOJI, ///< The Emoji which map to standardized Unicode characters. + UNKNOWN ///< The script is unknown. }; const char* const ScriptName[] = { + "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + "GREEK", ///< The Greek script. Used by Greek. "LATIN", ///< The latin script. Used by many western languages and others around the world. + "ARABIC", ///< The arabic script. Used by Arab and Urdu among others. - "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + "ARMENIAN", ///< The Armenian script. Used by Armenian. + "GEORGIAN", ///< The Georgian script. Used by Georgian. + + "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + "HANGUL", ///< The Hangul jamo script. Used by Korean. + "HIRAGANA", ///< The Hiragana script. Used by the Japanese. + "KATAKANA", ///< The Katakana script. Used by the Japanese. + "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. + "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. + "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. "GUJARATI", ///< The Gujarati script. Used by Gujarati. - "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. "KANNADA", ///< The Kannada script. Used by Kannada and Tulu. "MALAYALAM", ///< The Malayalam script. Used by Malayalam. + "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali. - "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - "HANGUL", ///< The Hangul jamo script. Used by Korean. - "KHMER", ///< The Khmer script. Used by the Khmer language. + "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + "LAO", ///< The Lao script. Used by the Lao language. "THAI", ///< The Thai script. Used by the Thai language - "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. - "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - "HIRAGANA", ///< The Hiragana script. Used by the Japanese. - "KATAKANA", ///< The Katakana script. Used by the Japanese. + "KHMER", ///< The Khmer script. Used by the Khmer language. + "EMOJI", ///< The Emoji which map to standardized Unicode characters. + "UNKNOWN" ///< The script is unknown. }; diff --git a/text/dali/internal/text-abstraction/shaping-impl.cpp b/text/dali/internal/text-abstraction/shaping-impl.cpp index 6f7d5c9..fd6e8dc 100644 --- a/text/dali/internal/text-abstraction/shaping-impl.cpp +++ b/text/dali/internal/text-abstraction/shaping-impl.cpp @@ -45,24 +45,38 @@ const float FROM_266 = 1.0f / 64.0f; const hb_script_t SCRIPT_TO_HARFBUZZ[] = { + HB_SCRIPT_CYRILLIC, + HB_SCRIPT_GREEK, HB_SCRIPT_LATIN, + HB_SCRIPT_ARABIC, - HB_SCRIPT_DEVANAGARI, + HB_SCRIPT_HEBREW, + + HB_SCRIPT_ARMENIAN, + HB_SCRIPT_GEORGIAN, + + HB_SCRIPT_HAN, + HB_SCRIPT_HANGUL, + HB_SCRIPT_HIRAGANA, + HB_SCRIPT_KATAKANA, + HB_SCRIPT_BENGALI, - HB_SCRIPT_GURMUKHI, + HB_SCRIPT_MYANMAR, + HB_SCRIPT_DEVANAGARI, HB_SCRIPT_GUJARATI, - HB_SCRIPT_ORIYA, - HB_SCRIPT_TAMIL, - HB_SCRIPT_TELUGU, + HB_SCRIPT_GURMUKHI, HB_SCRIPT_KANNADA, HB_SCRIPT_MALAYALAM, + HB_SCRIPT_ORIYA, HB_SCRIPT_SINHALA, - HB_SCRIPT_HAN, - HB_SCRIPT_HANGUL, - HB_SCRIPT_KHMER, + HB_SCRIPT_TAMIL, + HB_SCRIPT_TELUGU, + HB_SCRIPT_LAO, HB_SCRIPT_THAI, - HB_SCRIPT_MYANMAR, + HB_SCRIPT_KHMER, + + HB_SCRIPT_UNKNOWN, // EMOJI HB_SCRIPT_UNKNOWN }; -- 2.7.4