*/
enum Script
{
- COMMON, ///< Valid for all scripts. i.e white space or '\n'.
-
- CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
- GREEK, ///< The Greek script. Used by Greek.
- LATIN, ///< The latin script. Used by many western languages and others around the world.
-
- ARABIC, ///< The arabic script. Used by Arab and Urdu among others.
- HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
-
- ARMENIAN, ///< The Armenian script. Used by Armenian.
- GEORGIAN, ///< The Georgian script. Used by Georgian.
-
- CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
- HANGUL, ///< The Hangul jamo script. Used by Korean.
- HIRAGANA, ///< The Hiragana script. Used by the Japanese.
- KATAKANA, ///< The Katakana script. Used by the Japanese.
- BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
-
- BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
- BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
- DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
- GUJARATI, ///< The Gujarati script. Used by Gujarati.
- GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
- KANNADA, ///< The Kannada script. Used by Kannada and Tulu.
- MALAYALAM, ///< The Malayalam script. Used by Malayalam.
- ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
- SINHALA, ///< The Sinhala script. Used by Sinhala and Pali.
- TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
- TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
-
- LAO, ///< The Lao script. Used by the Lao language.
- THAI, ///< The Thai script. Used by the Thai language
- KHMER, ///< The Khmer script. Used by the Khmer language.
-
- EMOJI, ///< The Emoji which map to standardized Unicode characters.
-
- UNKNOWN ///< The script is unknown.
+ COMMON, ///< Valid for all scripts. i.e white space or '\n'.
+
+ ASCII_DIGITS, ///< ASCII digits.
+ ASCII_PS, ///< ASCII punctuation and symbols.
+
+ C1_CONTROLS, ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
+ C1_PS, ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
+ C1_MATH, ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
+
+ SML_P, ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
+ PHONETIC_U, ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
+ PHONETIC_SS, ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
+
+ NUMERIC_SS, ///< Numeric subscripts and superscripts.
+
+ LETTER_LIKE, ///< Symbols of the Letterlike unicode block.
+ NUMBER_FORMS, ///< Number Forms unicode block.
+ FRACTIONS_NF, ///< Numeric fraction symbols of the Number Forms unicode block.
+ NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block.
+ HWFW_S, ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
+
+ CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+ GREEK, ///< The Greek script. Used by Greek.
+ LATIN, ///< The latin script. Used by many western languages and others around the world.
+
+ ARABIC, ///< The arabic script. Used by Arab and Urdu among others.
+ HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+ ARMENIAN, ///< The Armenian script. Used by Armenian.
+ GEORGIAN, ///< The Georgian script. Used by Georgian.
+
+ CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+ HANGUL, ///< The Hangul jamo script. Used by Korean.
+ HIRAGANA, ///< The Hiragana script. Used by the Japanese.
+ KATAKANA, ///< The Katakana script. Used by the Japanese.
+ BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
+
+ BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
+ BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
+ DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+ GUJARATI, ///< The Gujarati script. Used by Gujarati.
+ GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
+ KANNADA, ///< The Kannada script. Used by Kannada and Tulu.
+ MALAYALAM, ///< The Malayalam script. Used by Malayalam.
+ ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
+ SINHALA, ///< The Sinhala script. Used by Sinhala and Pali.
+ TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+ TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
+ LAO, ///< The Lao script. Used by the Lao language.
+ THAI, ///< The Thai script. Used by the Thai language
+ KHMER, ///< The Khmer script. Used by the Khmer language.
+ JAVANESE, ///< The Javanese script. Used by the Javanese language.
+ SUNDANESE, ///< The Sundanese script. Used by the Sundanese language.
+
+ EMOJI, ///< The Emoji which map to standardized Unicode characters.
+
+ SYMBOLS1, ///< Some symbols.
+ SYMBOLS2, ///< Some symbols.
+ SYMBOLS3, ///< Some symbols.
+ SYMBOLS4, ///< Some symbols.
+ SYMBOLS5, ///< Some symbols.
+
+ UNKNOWN ///< The script is unknown.
};
const char* const ScriptName[] =
{
- "COMMON", ///< Valid for all scripts. i.e white space or '\n'.
-
- "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
- "GREEK", ///< The Greek script. Used by Greek.
- "LATIN", ///< The latin script. Used by many western languages and others around the world.
-
- "ARABIC", ///< The arabic script. Used by Arab and Urdu among others.
- "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
-
- "ARMENIAN", ///< The Armenian script. Used by Armenian.
- "GEORGIAN", ///< The Georgian script. Used by Georgian.
-
- "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
- "HANGUL", ///< The Hangul jamo script. Used by Korean.
- "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
- "KATAKANA", ///< The Katakana script. Used by the Japanese.
- "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
-
- "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
- "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
- "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
- "GUJARATI", ///< The Gujarati script. Used by Gujarati.
- "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
- "KANNADA", ///< The Kannada script. Used by Kannada and Tulu.
- "MALAYALAM", ///< The Malayalam script. Used by Malayalam.
- "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
- "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali.
- "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
- "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
-
- "LAO", ///< The Lao script. Used by the Lao language.
- "THAI", ///< The Thai script. Used by the Thai language
- "KHMER", ///< The Khmer script. Used by the Khmer language.
-
- "EMOJI", ///< The Emoji which map to standardized Unicode characters.
-
- "UNKNOWN" ///< The script is unknown.
+ "COMMON", ///< Valid for all scripts. i.e white space or '\n'.
+
+ "ASCII_DIGITS", ///< ASCII digits.
+ "ASCII_PS", ///< ASCII punctuation and symbols.
+
+ "C1_CONTROLS", ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
+ "C1_PS", ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
+ "C1_MATH", ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
+
+ "SML_P", ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
+ "PHONETIC_U", ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
+ "PHONETIC_SS", ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
+
+ "NUMERIC_SS", ///< Numeric subscripts and superscripts.
+
+ "LETTER_LIKE", ///< Symbols of the Letterlike unicode block.
+ "NUMBER_FORMS", ///< Number Forms unicode block.
+ "FRACTIONS_NF", ///< Numeric fraction symbols of the Number Forms unicode block.
+ "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block.
+ "HWFW_S", ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
+
+ "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+ "GREEK", ///< The Greek script. Used by Greek.
+ "LATIN", ///< The latin script. Used by many western languages and others around the world.
+
+ "ARABIC", ///< The arabic script. Used by Arab and Urdu among others.
+ "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+ "ARMENIAN", ///< The Armenian script. Used by Armenian.
+ "GEORGIAN", ///< The Georgian script. Used by Georgian.
+
+ "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+ "HANGUL", ///< The Hangul jamo script. Used by Korean.
+ "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
+ "KATAKANA", ///< The Katakana script. Used by the Japanese.
+ "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
+
+ "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
+ "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
+ "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+ "GUJARATI", ///< The Gujarati script. Used by Gujarati.
+ "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
+ "KANNADA", ///< The Kannada script. Used by Kannada and Tulu.
+ "MALAYALAM", ///< The Malayalam script. Used by Malayalam.
+ "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
+ "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali.
+ "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+ "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
+ "LAO", ///< The Lao script. Used by the Lao language.
+ "THAI", ///< The Thai script. Used by the Thai language
+ "KHMER", ///< The Khmer script. Used by the Khmer language.
+ "JAVANESE", ///< The Javanese script. Used by the Javanese language.
+ "SUNDANESE", ///< The Sundanese script. Used by the Sundanese language.
+
+ "EMOJI", ///< The Emoji which map to standardized Unicode characters.
+
+ "SYMBOLS1", ///< Some symbols.
+ "SYMBOLS2", ///< Some symbols.
+ "SYMBOLS3", ///< Some symbols.
+ "SYMBOLS4", ///< Some symbols.
+ "SYMBOLS5", ///< Some symbols.
+
+ "UNKNOWN" ///< The script is unknown.
};
/**
*/
DALI_IMPORT_API bool IsCommonScript( Character character );
+/**
+ * @brief Whether the script contains ligatures that must be 'broken' for selection or cursor position.
+ *
+ * i.e The latin script has the 'ff' or 'fi' ligatures that need to be broken to position the cursor
+ * between the two characters. Equally the arabic script has the 'ﻻ' ligature that needs to be broken.
+ *
+ * @param[in] script The script.
+ *
+ * @return @e true if the script has ligatures that must be 'broken'.
+ */
+DALI_IMPORT_API bool HasLigatureMustBreak( Script script );
} // namespace TextAbstraction
} // namespace Dali