dali/devel-api/text-abstraction/script.h

   1 #ifndef DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H
   2 #define DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H
   3
   4 /*
   5  * Copyright (c) 2019 Samsung Electronics Co., Ltd.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  * http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  */
  20
  21 // INTERNAL INCLUDES
  22 #include <dali/public-api/dali-adaptor-common.h>
  23 #include <dali/devel-api/text-abstraction/text-abstraction-definitions.h>
  24
  25 namespace Dali
  26 {
  27
  28 namespace TextAbstraction
  29 {
  30
  31 /**
  32  * @brief Script is the writing system used by a language.
  33  * Typically one script can be used to write different languages although one language could be written in different scrips.
  34  */
  35 enum Script
  36 {
  37   COMMON,        ///< Valid for all scripts. i.e white space or '\n'.
  38
  39   ASCII_DIGITS,  ///< ASCII digits.
  40   ASCII_PS,      ///< ASCII punctuation and symbols.
  41
  42   C1_CONTROLS,   ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
  43   C1_PS,         ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
  44   C1_MATH,       ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
  45
  46   SML_P,         ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
  47   PHONETIC_U,    ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
  48   PHONETIC_SS,   ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
  49
  50   NUMERIC_SS,    ///< Numeric subscripts and superscripts.
  51
  52   LETTER_LIKE,   ///< Symbols of the Letterlike unicode block.
  53   NUMBER_FORMS,  ///< Number Forms unicode block.
  54   FRACTIONS_NF,  ///< Numeric fraction symbols of the Number Forms unicode block.
  55   NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block.
  56   HWFW_S,        ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
  57
  58   CYRILLIC,      ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
  59   GREEK,         ///< The Greek script. Used by Greek.
  60   LATIN,         ///< The latin script. Used by many western languages and others around the world.
  61
  62   ARABIC,        ///< The arabic script. Used by Arab and Urdu among others.
  63   HEBREW,        ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
  64
  65   ARMENIAN,      ///< The Armenian script. Used by Armenian.
  66   GEORGIAN,      ///< The Georgian script. Used by Georgian.
  67
  68   CJK,           ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
  69   HANGUL,        ///< The Hangul jamo script. Used by Korean.
  70   HIRAGANA,      ///< The Hiragana script. Used by the Japanese.
  71   KATAKANA,      ///< The Katakana script. Used by the Japanese.
  72   BOPOMOFO,      ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
  73
  74   BENGALI,       ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
  75   BURMESE,       ///< The Burmese script. Used by the Burmese (Myanmar) language.
  76   DEVANAGARI,    ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
  77   GUJARATI,      ///< The Gujarati script. Used by Gujarati.
  78   GURMUKHI,      ///< The Gurmukhi script. Used by Punjabi.
  79   KANNADA,       ///< The Kannada script. Used by Kannada and Tulu.
  80   MALAYALAM,     ///< The Malayalam script. Used by Malayalam.
  81   ORIYA,         ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
  82   SINHALA,       ///< The Sinhala script. Used by Sinhala and Pali.
  83   TAMIL,         ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
  84   TELUGU,        ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
  85
  86   LAO,           ///< The Lao script. Used by the Lao language.
  87   THAI,          ///< The Thai script. Used by the Thai language
  88   KHMER,         ///< The Khmer script. Used by the Khmer language.
  89   JAVANESE,      ///< The Javanese script. Used by the Javanese language.
  90   SUNDANESE,     ///< The Sundanese script. Used by the Sundanese language.
  91
  92   GEEZ,          ///< The Ge'ez script. Used by the Amharic, Tigrinya and other languages in Ethiopia and Eritrea.
  93   OL_CHIKI,      ///< The Ol Chiki script. Used by the Santali.
  94   BAYBAYIN,      ///< The Baybayin script. Used by the Tagalog, Bikol languages, Ilocano, Pangasinan, Visayan and other languages in Philippines.
  95   MEITEI,        ///< The Meitei script used for the Meitei language in Manipur, India.
  96
  97   EMOJI,         ///< The Emoji which map to standardized Unicode characters.
  98
  99   SYMBOLS1,      ///< Some symbols.
 100   SYMBOLS2,      ///< Some symbols.
 101   SYMBOLS3,      ///< Some symbols.
 102   SYMBOLS4,      ///< Some symbols.
 103   SYMBOLS5,      ///< Some symbols.
 104
 105   UNKNOWN        ///< The script is unknown.
 106 };
 107
 108 const char* const ScriptName[] =
 109 {
 110   "COMMON",        ///< Valid for all scripts. i.e white space or '\n'.
 111
 112   "ASCII_DIGITS",  ///< ASCII digits.
 113   "ASCII_PS",      ///< ASCII punctuation and symbols.
 114
 115   "C1_CONTROLS",   ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
 116   "C1_PS",         ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
 117   "C1_MATH",       ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
 118
 119   "SML_P",         ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
 120   "PHONETIC_U",    ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
 121   "PHONETIC_SS",   ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
 122
 123   "NUMERIC_SS",    ///< Numeric subscripts and superscripts.
 124
 125   "LETTER_LIKE",   ///< Symbols of the Letterlike unicode block.
 126   "NUMBER_FORMS",  ///< Number Forms unicode block.
 127   "FRACTIONS_NF",  ///< Numeric fraction symbols of the Number Forms unicode block.
 128   "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block.
 129   "HWFW_S",        ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
 130
 131   "CYRILLIC",      ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
 132   "GREEK",         ///< The Greek script. Used by Greek.
 133   "LATIN",         ///< The latin script. Used by many western languages and others around the world.
 134
 135   "ARABIC",        ///< The arabic script. Used by Arab and Urdu among others.
 136   "HEBREW",        ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
 137
 138   "ARMENIAN",      ///< The Armenian script. Used by Armenian.
 139   "GEORGIAN",      ///< The Georgian script. Used by Georgian.
 140
 141   "CJK",           ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
 142   "HANGUL",        ///< The Hangul jamo script. Used by Korean.
 143   "HIRAGANA",      ///< The Hiragana script. Used by the Japanese.
 144   "KATAKANA",      ///< The Katakana script. Used by the Japanese.
 145   "BOPOMOFO",      ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
 146
 147   "BENGALI",       ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
 148   "BURMESE",       ///< The Burmese script. Used by the Burmese (Myanmar) language.
 149   "DEVANAGARI",    ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
 150   "GUJARATI",      ///< The Gujarati script. Used by Gujarati.
 151   "GURMUKHI",      ///< The Gurmukhi script. Used by Punjabi.
 152   "KANNADA",       ///< The Kannada script. Used by Kannada and Tulu.
 153   "MALAYALAM",     ///< The Malayalam script. Used by Malayalam.
 154   "ORIYA",         ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
 155   "SINHALA",       ///< The Sinhala script. Used by Sinhala and Pali.
 156   "TAMIL",         ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
 157   "TELUGU",        ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
 158
 159   "LAO",           ///< The Lao script. Used by the Lao language.
 160   "THAI",          ///< The Thai script. Used by the Thai language
 161   "KHMER",         ///< The Khmer script. Used by the Khmer language.
 162   "JAVANESE",      ///< The Javanese script. Used by the Javanese language.
 163   "SUNDANESE",     ///< The Sundanese script. Used by the Sundanese language.
 164
 165   "GEEZ",          ///< The Ge'ez script also known as Ethiopic. Used by the Amharic, Tigrinya and other languages in Ethiopia and Eritrea.
 166   "OL_CHIKI",      ///< The Ol Chiki script. Used by the Santali.
 167   "BAYBAYIN",      ///< The Baybayin script. Used by the Tagalog, Bikol languages, Ilocano, Pangasinan, Visayan and other languages in Philippines.
 168   "MEITEI",        ///< The Meitei script used for the Meitei language in Manipur, India.
 169
 170   "EMOJI",         ///< The Emoji which map to standardized Unicode characters.
 171
 172   "SYMBOLS1",      ///< Some symbols.
 173   "SYMBOLS2",      ///< Some symbols.
 174   "SYMBOLS3",      ///< Some symbols.
 175   "SYMBOLS4",      ///< Some symbols.
 176   "SYMBOLS5",      ///< Some symbols.
 177
 178   "UNKNOWN"        ///< The script is unknown.
 179 };
 180
 181 /**
 182  * @brief Whether the script is a right to left script.
 183  *
 184  * @param[in] script The script.
 185  *
 186  * @return @e true if the script is right to left.
 187  */
 188 DALI_ADAPTOR_API bool IsRightToLeftScript( Script script );
 189
 190 /**
 191  * @brief Retrieves a character's script.
 192  *
 193  * @param[in] character The character.
 194  *
 195  * @return The chraracter's script.
 196  */
 197 DALI_ADAPTOR_API Script GetCharacterScript( Character character );
 198
 199 /**
 200  * @brief Whether the character is a white space.
 201  *
 202  * @param[in] character The character.
 203  *
 204  * @return @e true if the character is a white space.
 205  */
 206 DALI_ADAPTOR_API bool IsWhiteSpace( Character character );
 207
 208 /**
 209  * @brief Whether the character is a new paragraph character.
 210  *
 211  * @param[in] character The character.
 212  *
 213  * @return @e true if the character is a new paragraph character.
 214  */
 215 DALI_ADAPTOR_API bool IsNewParagraph( Character character );
 216
 217 /**
 218  * @brief Whether the character is a zero width non joiner.
 219  *
 220  * @param[in] character The character.
 221  *
 222  * @return @e true if the character is a zero width non joiner.
 223  */
 224 DALI_ADAPTOR_API bool IsZeroWidthNonJoiner( Character character );
 225
 226 /**
 227  * @brief Whether the character is a zero width joiner.
 228  *
 229  * @param[in] character The character.
 230  *
 231  * @return @e true if the character is a zero width joiner.
 232  */
 233 DALI_ADAPTOR_API bool IsZeroWidthJoiner( Character character );
 234
 235 /**
 236  * @brief Whether the character is a zero width space.
 237  *
 238  * @param[in] character The character.
 239  *
 240  * @return @e true if the character is a zero width space.
 241  */
 242 DALI_ADAPTOR_API bool IsZeroWidthSpace( Character character );
 243
 244 /**
 245  * @brief Whether the character is a left to right mark.
 246  *
 247  * @param[in] character The character.
 248  *
 249  * @return @e true if the character is a left to right mark.
 250  */
 251 DALI_ADAPTOR_API bool IsLeftToRightMark( Character character );
 252
 253 /**
 254  * @brief Whether the character is a right to left mark.
 255  *
 256  * @param[in] character The character.
 257  *
 258  * @return @e true if the character is a right to left mark.
 259  */
 260 DALI_ADAPTOR_API bool IsRightToLeftMark( Character character );
 261
 262 /**
 263  * @brief Whether the character is a thin space.
 264  *
 265  * @param[in] character The character.
 266  *
 267  * @return @e true if the character is a thin space.
 268  */
 269 DALI_ADAPTOR_API bool IsThinSpace( Character character );
 270
 271 /**
 272  * @brief Whether the character is common within all scripts.
 273  *
 274  * @param[in] character The character.
 275  *
 276  * @return @e true if the character is common within all scripts.
 277  */
 278 DALI_ADAPTOR_API bool IsCommonScript( Character character );
 279
 280 /**
 281  * @brief Whether the script contains ligatures that must be 'broken' for selection or cursor position.
 282  *
 283  * i.e The latin script has the 'ff' or 'fi' ligatures that need to be broken to position the cursor
 284  * between the two characters. Equally the arabic script has the 'ﻻ' ligature that needs to be broken.
 285  *
 286  * @param[in] script The script.
 287  *
 288  * @return @e true if the script has ligatures that must be 'broken'.
 289  */
 290 DALI_ADAPTOR_API bool HasLigatureMustBreak( Script script );
 291 } // namespace TextAbstraction
 292
 293 } // namespace Dali
 294
 295 #endif // DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H