X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=text%2Fdali%2Fdevel-api%2Ftext-abstraction%2Fscript.h;h=163aae24e09591043255df04e8f7656043e6f627;hb=108122c6a960d27bc0354391a22189bfd376f72f;hp=06b13017d5c649c8aa32c3eef37f25386cfd31aa;hpb=1f710892b95139f9001b86f27d9f4a00bf08fc01;p=platform%2Fcore%2Fuifw%2Fdali-adaptor.git diff --git a/text/dali/devel-api/text-abstraction/script.h b/text/dali/devel-api/text-abstraction/script.h index 06b1301..163aae2 100644 --- a/text/dali/devel-api/text-abstraction/script.h +++ b/text/dali/devel-api/text-abstraction/script.h @@ -36,82 +36,138 @@ namespace TextAbstraction */ enum Script { - COMMON, ///< Valid for all scripts. i.e white space or '\n'. - - CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... - GREEK, ///< The Greek script. Used by Greek. - LATIN, ///< The latin script. Used by many western languages and others around the world. - - ARABIC, ///< The arabic script. Used by Arab and Urdu among others. - HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - - ARMENIAN, ///< The Armenian script. Used by Armenian. - GEORGIAN, ///< The Georgian script. Used by Georgian. - - CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - HANGUL, ///< The Hangul jamo script. Used by Korean. - HIRAGANA, ///< The Hiragana script. Used by the Japanese. - KATAKANA, ///< The Katakana script. Used by the Japanese. - - BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. - DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. - GUJARATI, ///< The Gujarati script. Used by Gujarati. - GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. - KANNADA, ///< The Kannada script. Used by Kannada and Tulu. - MALAYALAM, ///< The Malayalam script. Used by Malayalam. - ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - SINHALA, ///< The Sinhala script. Used by Sinhala and Pali. - TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. - - LAO, ///< The Lao script. Used by the Lao language. - THAI, ///< The Thai script. Used by the Thai language - KHMER, ///< The Khmer script. Used by the Khmer language. - - EMOJI, ///< The Emoji which map to standardized Unicode characters. - - UNKNOWN ///< The script is unknown. + COMMON, ///< Valid for all scripts. i.e white space or '\n'. + + ASCII_DIGITS, ///< ASCII digits. + ASCII_PS, ///< ASCII punctuation and symbols. + + C1_CONTROLS, ///< Controls of the C1 Controls and Latin-1 Supplement unicode block. + C1_PS, ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block. + C1_MATH, ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block. + + SML_P, ///< Punctuation symbols of the Spacing Modifier Letters unicode block. + PHONETIC_U, ///< Uralic phonetic symbols of the Phonetic Extensions unicode block. + PHONETIC_SS, ///< Subscripts and superscripts of the Phonetic Extensions unicode block. + + NUMERIC_SS, ///< Numeric subscripts and superscripts. + + LETTER_LIKE, ///< Symbols of the Letterlike unicode block. + NUMBER_FORMS, ///< Number Forms unicode block. + FRACTIONS_NF, ///< Numeric fraction symbols of the Number Forms unicode block. + NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block. + HWFW_S, ///< Non latin symbols within the Halfwidth and fullwidth unicode block. + + CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + GREEK, ///< The Greek script. Used by Greek. + LATIN, ///< The latin script. Used by many western languages and others around the world. + + ARABIC, ///< The arabic script. Used by Arab and Urdu among others. + HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + ARMENIAN, ///< The Armenian script. Used by Armenian. + GEORGIAN, ///< The Georgian script. Used by Georgian. + + CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + HANGUL, ///< The Hangul jamo script. Used by Korean. + HIRAGANA, ///< The Hiragana script. Used by the Japanese. + KATAKANA, ///< The Katakana script. Used by the Japanese. + BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. + + BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. + BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language. + DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + GUJARATI, ///< The Gujarati script. Used by Gujarati. + GURMUKHI, ///< The Gurmukhi script. Used by Punjabi. + KANNADA, ///< The Kannada script. Used by Kannada and Tulu. + MALAYALAM, ///< The Malayalam script. Used by Malayalam. + ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. + SINHALA, ///< The Sinhala script. Used by Sinhala and Pali. + TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + + LAO, ///< The Lao script. Used by the Lao language. + THAI, ///< The Thai script. Used by the Thai language + KHMER, ///< The Khmer script. Used by the Khmer language. + JAVANESE, ///< The Javanese script. Used by the Javanese language. + SUNDANESE, ///< The Sundanese script. Used by the Sundanese language. + + EMOJI, ///< The Emoji which map to standardized Unicode characters. + + SYMBOLS1, ///< Some symbols. + SYMBOLS2, ///< Some symbols. + SYMBOLS3, ///< Some symbols. + SYMBOLS4, ///< Some symbols. + SYMBOLS5, ///< Some symbols. + + UNKNOWN ///< The script is unknown. }; const char* const ScriptName[] = { - "COMMON", ///< Valid for all scripts. i.e white space or '\n'. - - "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... - "GREEK", ///< The Greek script. Used by Greek. - "LATIN", ///< The latin script. Used by many western languages and others around the world. - - "ARABIC", ///< The arabic script. Used by Arab and Urdu among others. - "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. - - "ARMENIAN", ///< The Armenian script. Used by Armenian. - "GEORGIAN", ///< The Georgian script. Used by Georgian. - - "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). - "HANGUL", ///< The Hangul jamo script. Used by Korean. - "HIRAGANA", ///< The Hiragana script. Used by the Japanese. - "KATAKANA", ///< The Katakana script. Used by the Japanese. - - "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. - "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. - "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. - "GUJARATI", ///< The Gujarati script. Used by Gujarati. - "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. - "KANNADA", ///< The Kannada script. Used by Kannada and Tulu. - "MALAYALAM", ///< The Malayalam script. Used by Malayalam. - "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. - "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali. - "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. - "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. - - "LAO", ///< The Lao script. Used by the Lao language. - "THAI", ///< The Thai script. Used by the Thai language - "KHMER", ///< The Khmer script. Used by the Khmer language. - - "EMOJI", ///< The Emoji which map to standardized Unicode characters. - - "UNKNOWN" ///< The script is unknown. + "COMMON", ///< Valid for all scripts. i.e white space or '\n'. + + "ASCII_DIGITS", ///< ASCII digits. + "ASCII_PS", ///< ASCII punctuation and symbols. + + "C1_CONTROLS", ///< Controls of the C1 Controls and Latin-1 Supplement unicode block. + "C1_PS", ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block. + "C1_MATH", ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block. + + "SML_P", ///< Punctuation symbols of the Spacing Modifier Letters unicode block. + "PHONETIC_U", ///< Uralic phonetic symbols of the Phonetic Extensions unicode block. + "PHONETIC_SS", ///< Subscripts and superscripts of the Phonetic Extensions unicode block. + + "NUMERIC_SS", ///< Numeric subscripts and superscripts. + + "LETTER_LIKE", ///< Symbols of the Letterlike unicode block. + "NUMBER_FORMS", ///< Number Forms unicode block. + "FRACTIONS_NF", ///< Numeric fraction symbols of the Number Forms unicode block. + "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block. + "HWFW_S", ///< Non latin symbols within the Halfwidth and fullwidth unicode block. + + "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ... + "GREEK", ///< The Greek script. Used by Greek. + "LATIN", ///< The latin script. Used by many western languages and others around the world. + + "ARABIC", ///< The arabic script. Used by Arab and Urdu among others. + "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic. + + "ARMENIAN", ///< The Armenian script. Used by Armenian. + "GEORGIAN", ///< The Georgian script. Used by Georgian. + + "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system). + "HANGUL", ///< The Hangul jamo script. Used by Korean. + "HIRAGANA", ///< The Hiragana script. Used by the Japanese. + "KATAKANA", ///< The Katakana script. Used by the Japanese. + "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese. + + "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali. + "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language. + "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit. + "GUJARATI", ///< The Gujarati script. Used by Gujarati. + "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi. + "KANNADA", ///< The Kannada script. Used by Kannada and Tulu. + "MALAYALAM", ///< The Malayalam script. Used by Malayalam. + "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali. + "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali. + "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra. + "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi. + + "LAO", ///< The Lao script. Used by the Lao language. + "THAI", ///< The Thai script. Used by the Thai language + "KHMER", ///< The Khmer script. Used by the Khmer language. + "JAVANESE", ///< The Javanese script. Used by the Javanese language. + "SUNDANESE", ///< The Sundanese script. Used by the Sundanese language. + + "EMOJI", ///< The Emoji which map to standardized Unicode characters. + + "SYMBOLS1", ///< Some symbols. + "SYMBOLS2", ///< Some symbols. + "SYMBOLS3", ///< Some symbols. + "SYMBOLS4", ///< Some symbols. + "SYMBOLS5", ///< Some symbols. + + "UNKNOWN" ///< The script is unknown. }; /** @@ -213,6 +269,17 @@ DALI_IMPORT_API bool IsThinSpace( Character character ); */ DALI_IMPORT_API bool IsCommonScript( Character character ); +/** + * @brief Whether the script contains ligatures that must be 'broken' for selection or cursor position. + * + * i.e The latin script has the 'ff' or 'fi' ligatures that need to be broken to position the cursor + * between the two characters. Equally the arabic script has the 'ï»»' ligature that needs to be broken. + * + * @param[in] script The script. + * + * @return @e true if the script has ligatures that must be 'broken'. + */ +DALI_IMPORT_API bool HasLigatureMustBreak( Script script ); } // namespace TextAbstraction } // namespace Dali