text/dali/devel-api/text-abstraction/script.h

   1 #ifndef __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
   2 #define __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
   3
   4 /*
   5  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  * http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  */
  20
  21 // EXTERNAL INCLUDES
  22 #include <dali/public-api/common/dali-common.h>
  23
  24 // INTERNAL INCLUDES
  25 #include <dali/devel-api/text-abstraction/text-abstraction-definitions.h>
  26
  27 namespace Dali
  28 {
  29
  30 namespace TextAbstraction
  31 {
  32
  33 /**
  34  * @brief Script is the writing system used by a language.
  35  * Typically one script can be used to write different languages although one language could be written in different scrips.
  36  */
  37 enum Script
  38 {
  39   COMMON,        ///< Valid for all scripts. i.e white space or '\n'.
  40
  41   ASCII_DIGITS,  ///< ASCII digits.
  42   ASCII_PS,      ///< ASCII punctuation and symbols.
  43
  44   C1_CONTROLS,   ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
  45   C1_PS,         ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
  46   C1_MATH,       ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
  47
  48   SML_P,         ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
  49   PHONETIC_U,    ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
  50   PHONETIC_SS,   ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
  51
  52   NUMERIC_SS,    ///< Numeric subscripts and superscripts.
  53
  54   LETTER_LIKE,   ///< Symbols of the Letterlike unicode block.
  55   NUMBER_FORMS,  ///< Number Forms unicode block.
  56   FRACTIONS_NF,  ///< Numeric fraction symbols of the Number Forms unicode block.
  57   NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block.
  58   HWFW_S,        ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
  59
  60   CYRILLIC,      ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
  61   GREEK,         ///< The Greek script. Used by Greek.
  62   LATIN,         ///< The latin script. Used by many western languages and others around the world.
  63
  64   ARABIC,        ///< The arabic script. Used by Arab and Urdu among others.
  65   HEBREW,        ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
  66
  67   ARMENIAN,      ///< The Armenian script. Used by Armenian.
  68   GEORGIAN,      ///< The Georgian script. Used by Georgian.
  69
  70   CJK,           ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
  71   HANGUL,        ///< The Hangul jamo script. Used by Korean.
  72   HIRAGANA,      ///< The Hiragana script. Used by the Japanese.
  73   KATAKANA,      ///< The Katakana script. Used by the Japanese.
  74   BOPOMOFO,      ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
  75
  76   BENGALI,       ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
  77   BURMESE,       ///< The Burmese script. Used by the Burmese (Myanmar) language.
  78   DEVANAGARI,    ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
  79   GUJARATI,      ///< The Gujarati script. Used by Gujarati.
  80   GURMUKHI,      ///< The Gurmukhi script. Used by Punjabi.
  81   KANNADA,       ///< The Kannada script. Used by Kannada and Tulu.
  82   MALAYALAM,     ///< The Malayalam script. Used by Malayalam.
  83   ORIYA,         ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
  84   SINHALA,       ///< The Sinhala script. Used by Sinhala and Pali.
  85   TAMIL,         ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
  86   TELUGU,        ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
  87
  88   LAO,           ///< The Lao script. Used by the Lao language.
  89   THAI,          ///< The Thai script. Used by the Thai language
  90   KHMER,         ///< The Khmer script. Used by the Khmer language.
  91   JAVANESE,      ///< The Javanese script. Used by the Javanese language.
  92   SUNDANESE,     ///< The Sundanese script. Used by the Sundanese language.
  93
  94   EMOJI,         ///< The Emoji which map to standardized Unicode characters.
  95
  96   SYMBOLS1,      ///< Some symbols.
  97   SYMBOLS2,      ///< Some symbols.
  98   SYMBOLS3,      ///< Some symbols.
  99   SYMBOLS4,      ///< Some symbols.
 100   SYMBOLS5,      ///< Some symbols.
 101
 102   UNKNOWN        ///< The script is unknown.
 103 };
 104
 105 const char* const ScriptName[] =
 106 {
 107   "COMMON",        ///< Valid for all scripts. i.e white space or '\n'.
 108
 109   "ASCII_DIGITS",  ///< ASCII digits.
 110   "ASCII_PS",      ///< ASCII punctuation and symbols.
 111
 112   "C1_CONTROLS",   ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
 113   "C1_PS",         ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
 114   "C1_MATH",       ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
 115
 116   "SML_P",         ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
 117   "PHONETIC_U",    ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
 118   "PHONETIC_SS",   ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
 119
 120   "NUMERIC_SS",    ///< Numeric subscripts and superscripts.
 121
 122   "LETTER_LIKE",   ///< Symbols of the Letterlike unicode block.
 123   "NUMBER_FORMS",  ///< Number Forms unicode block.
 124   "FRACTIONS_NF",  ///< Numeric fraction symbols of the Number Forms unicode block.
 125   "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block.
 126   "HWFW_S",        ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
 127
 128   "CYRILLIC",      ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
 129   "GREEK",         ///< The Greek script. Used by Greek.
 130   "LATIN",         ///< The latin script. Used by many western languages and others around the world.
 131
 132   "ARABIC",        ///< The arabic script. Used by Arab and Urdu among others.
 133   "HEBREW",        ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
 134
 135   "ARMENIAN",      ///< The Armenian script. Used by Armenian.
 136   "GEORGIAN",      ///< The Georgian script. Used by Georgian.
 137
 138   "CJK",           ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
 139   "HANGUL",        ///< The Hangul jamo script. Used by Korean.
 140   "HIRAGANA",      ///< The Hiragana script. Used by the Japanese.
 141   "KATAKANA",      ///< The Katakana script. Used by the Japanese.
 142   "BOPOMOFO",      ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
 143
 144   "BENGALI",       ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
 145   "BURMESE",       ///< The Burmese script. Used by the Burmese (Myanmar) language.
 146   "DEVANAGARI",    ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
 147   "GUJARATI",      ///< The Gujarati script. Used by Gujarati.
 148   "GURMUKHI",      ///< The Gurmukhi script. Used by Punjabi.
 149   "KANNADA",       ///< The Kannada script. Used by Kannada and Tulu.
 150   "MALAYALAM",     ///< The Malayalam script. Used by Malayalam.
 151   "ORIYA",         ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
 152   "SINHALA",       ///< The Sinhala script. Used by Sinhala and Pali.
 153   "TAMIL",         ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
 154   "TELUGU",        ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
 155
 156   "LAO",           ///< The Lao script. Used by the Lao language.
 157   "THAI",          ///< The Thai script. Used by the Thai language
 158   "KHMER",         ///< The Khmer script. Used by the Khmer language.
 159   "JAVANESE",      ///< The Javanese script. Used by the Javanese language.
 160   "SUNDANESE",     ///< The Sundanese script. Used by the Sundanese language.
 161
 162   "EMOJI",         ///< The Emoji which map to standardized Unicode characters.
 163
 164   "SYMBOLS1",      ///< Some symbols.
 165   "SYMBOLS2",      ///< Some symbols.
 166   "SYMBOLS3",      ///< Some symbols.
 167   "SYMBOLS4",      ///< Some symbols.
 168   "SYMBOLS5",      ///< Some symbols.
 169
 170   "UNKNOWN"        ///< The script is unknown.
 171 };
 172
 173 /**
 174  * @brief Whether the script is a right to left script.
 175  *
 176  * @param[in] script The script.
 177  *
 178  * @return @e true if the script is right to left.
 179  */
 180 DALI_IMPORT_API bool IsRightToLeftScript( Script script );
 181
 182 /**
 183  * @brief Retrieves a character's script.
 184  *
 185  * @param[in] character The character.
 186  *
 187  * @return The chraracter's script.
 188  */
 189 DALI_IMPORT_API Script GetCharacterScript( Character character );
 190
 191 /**
 192  * @brief Whether the character is a white space.
 193  *
 194  * @param[in] character The character.
 195  *
 196  * @return @e true if the character is a white space.
 197  */
 198 DALI_IMPORT_API bool IsWhiteSpace( Character character );
 199
 200 /**
 201  * @brief Whether the character is a new paragraph character.
 202  *
 203  * @param[in] character The character.
 204  *
 205  * @return @e true if the character is a new paragraph character.
 206  */
 207 DALI_IMPORT_API bool IsNewParagraph( Character character );
 208
 209 /**
 210  * @brief Whether the character is a zero width non joiner.
 211  *
 212  * @param[in] character The character.
 213  *
 214  * @return @e true if the character is a zero width non joiner.
 215  */
 216 DALI_IMPORT_API bool IsZeroWidthNonJoiner( Character character );
 217
 218 /**
 219  * @brief Whether the character is a zero width joiner.
 220  *
 221  * @param[in] character The character.
 222  *
 223  * @return @e true if the character is a zero width joiner.
 224  */
 225 DALI_IMPORT_API bool IsZeroWidthJoiner( Character character );
 226
 227 /**
 228  * @brief Whether the character is a zero width space.
 229  *
 230  * @param[in] character The character.
 231  *
 232  * @return @e true if the character is a zero width space.
 233  */
 234 DALI_IMPORT_API bool IsZeroWidthSpace( Character character );
 235
 236 /**
 237  * @brief Whether the character is a left to right mark.
 238  *
 239  * @param[in] character The character.
 240  *
 241  * @return @e true if the character is a left to right mark.
 242  */
 243 DALI_IMPORT_API bool IsLeftToRightMark( Character character );
 244
 245 /**
 246  * @brief Whether the character is a right to left mark.
 247  *
 248  * @param[in] character The character.
 249  *
 250  * @return @e true if the character is a right to left mark.
 251  */
 252 DALI_IMPORT_API bool IsRightToLeftMark( Character character );
 253
 254 /**
 255  * @brief Whether the character is a thin space.
 256  *
 257  * @param[in] character The character.
 258  *
 259  * @return @e true if the character is a thin space.
 260  */
 261 DALI_IMPORT_API bool IsThinSpace( Character character );
 262
 263 /**
 264  * @brief Whether the character is common within all scripts.
 265  *
 266  * @param[in] character The character.
 267  *
 268  * @return @e true if the character is common within all scripts.
 269  */
 270 DALI_IMPORT_API bool IsCommonScript( Character character );
 271
 272 /**
 273  * @brief Whether the script contains ligatures that must be 'broken' for selection or cursor position.
 274  *
 275  * i.e The latin script has the 'ff' or 'fi' ligatures that need to be broken to position the cursor
 276  * between the two characters. Equally the arabic script has the 'ﻻ' ligature that needs to be broken.
 277  *
 278  * @param[in] script The script.
 279  *
 280  * @return @e true if the script has ligatures that must be 'broken'.
 281  */
 282 DALI_IMPORT_API bool HasLigatureMustBreak( Script script );
 283 } // namespace TextAbstraction
 284
 285 } // namespace Dali
 286
 287 #endif // __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__