text/dali/devel-api/text-abstraction/script.h

   1 #ifndef __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
   2 #define __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
   3
   4 /*
   5  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  * http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  */
  20
  21 // EXTERNAL INCLUDES
  22 #include <dali/public-api/common/dali-common.h>
  23
  24 // INTERNAL INCLUDES
  25 #include <dali/devel-api/text-abstraction/text-abstraction-definitions.h>
  26
  27 namespace Dali
  28 {
  29
  30 namespace TextAbstraction
  31 {
  32
  33 /**
  34  * @brief Script is the writing system used by a language.
  35  * Typically one script can be used to write different languages although one language could be written in different scrips.
  36  */
  37 enum Script
  38 {
  39   COMMON,     ///< Valid for all scripts. i.e white space or '\n'.
  40
  41   CYRILLIC,   ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
  42   GREEK,      ///< The Greek script. Used by Greek.
  43   LATIN,      ///< The latin script. Used by many western languages and others around the world.
  44
  45   ARABIC,     ///< The arabic script. Used by Arab and Urdu among others.
  46   HEBREW,     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
  47
  48   ARMENIAN,   ///< The Armenian script. Used by Armenian.
  49   GEORGIAN,   ///< The Georgian script. Used by Georgian.
  50
  51   CJK,        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
  52   HANGUL,     ///< The Hangul jamo script. Used by Korean.
  53   HIRAGANA,   ///< The Hiragana script. Used by the Japanese.
  54   KATAKANA,   ///< The Katakana script. Used by the Japanese.
  55
  56   BENGALI,    ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
  57   BURMESE,    ///< The Burmese script. Used by the Burmese (Myanmar) language.
  58   DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
  59   GUJARATI,   ///< The Gujarati script. Used by Gujarati.
  60   GURMUKHI,   ///< The Gurmukhi script. Used by Punjabi.
  61   KANNADA,    ///< The Kannada script. Used by Kannada and Tulu.
  62   MALAYALAM,  ///< The Malayalam script. Used by Malayalam.
  63   ORIYA,      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
  64   SINHALA,    ///< The Sinhala script. Used by Sinhala and Pali.
  65   TAMIL,      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
  66   TELUGU,     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
  67
  68   LAO,        ///< The Lao script. Used by the Lao language.
  69   THAI,       ///< The Thai script. Used by the Thai language
  70   KHMER,      ///< The Khmer script. Used by the Khmer language.
  71
  72   EMOJI,      ///< The Emoji which map to standardized Unicode characters.
  73
  74   UNKNOWN     ///< The script is unknown.
  75 };
  76
  77 const char* const ScriptName[] =
  78 {
  79   "COMMON",     ///< Valid for all scripts. i.e white space or '\n'.
  80
  81   "CYRILLIC",   ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
  82   "GREEK",      ///< The Greek script. Used by Greek.
  83   "LATIN",      ///< The latin script. Used by many western languages and others around the world.
  84
  85   "ARABIC",     ///< The arabic script. Used by Arab and Urdu among others.
  86   "HEBREW",     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
  87
  88   "ARMENIAN",   ///< The Armenian script. Used by Armenian.
  89   "GEORGIAN",   ///< The Georgian script. Used by Georgian.
  90
  91   "CJK",        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
  92   "HANGUL",     ///< The Hangul jamo script. Used by Korean.
  93   "HIRAGANA",   ///< The Hiragana script. Used by the Japanese.
  94   "KATAKANA",   ///< The Katakana script. Used by the Japanese.
  95
  96   "BENGALI",    ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
  97   "BURMESE",    ///< The Burmese script. Used by the Burmese (Myanmar) language.
  98   "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
  99   "GUJARATI",   ///< The Gujarati script. Used by Gujarati.
 100   "GURMUKHI",   ///< The Gurmukhi script. Used by Punjabi.
 101   "KANNADA",    ///< The Kannada script. Used by Kannada and Tulu.
 102   "MALAYALAM",  ///< The Malayalam script. Used by Malayalam.
 103   "ORIYA",      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
 104   "SINHALA",    ///< The Sinhala script. Used by Sinhala and Pali.
 105   "TAMIL",      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
 106   "TELUGU",     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
 107
 108   "LAO",        ///< The Lao script. Used by the Lao language.
 109   "THAI",       ///< The Thai script. Used by the Thai language
 110   "KHMER",      ///< The Khmer script. Used by the Khmer language.
 111
 112   "EMOJI",      ///< The Emoji which map to standardized Unicode characters.
 113
 114   "UNKNOWN"     ///< The script is unknown.
 115 };
 116
 117 /**
 118  * @brief Whether the script is a right to left script.
 119  *
 120  * @param[in] script The script.
 121  *
 122  * @return @e true if the script is right to left.
 123  */
 124 DALI_IMPORT_API bool IsRightToLeftScript( Script script );
 125
 126 /**
 127  * @brief Retrieves a character's script.
 128  *
 129  * @param[in] character The character.
 130  *
 131  * @return The chraracter's script.
 132  */
 133 DALI_IMPORT_API Script GetCharacterScript( Character character );
 134
 135 /**
 136  * @brief Whether the character is a white space.
 137  *
 138  * @param[in] character The character.
 139  *
 140  * @return @e true if the character is a white space.
 141  */
 142 DALI_IMPORT_API bool IsWhiteSpace( Character character );
 143
 144 /**
 145  * @brief Whether the character is a new paragraph character.
 146  *
 147  * @param[in] character The character.
 148  *
 149  * @return @e true if the character is a new paragraph character.
 150  */
 151 DALI_IMPORT_API bool IsNewParagraph( Character character );
 152
 153 /**
 154  * @brief Whether the character is a zero width non joiner.
 155  *
 156  * @param[in] character The character.
 157  *
 158  * @return @e true if the character is a zero width non joiner.
 159  */
 160 DALI_IMPORT_API bool IsZeroWidthNonJoiner( Character character );
 161
 162 /**
 163  * @brief Whether the character is a zero width joiner.
 164  *
 165  * @param[in] character The character.
 166  *
 167  * @return @e true if the character is a zero width joiner.
 168  */
 169 DALI_IMPORT_API bool IsZeroWidthJoiner( Character character );
 170
 171 /**
 172  * @brief Whether the character is a zero width space.
 173  *
 174  * @param[in] character The character.
 175  *
 176  * @return @e true if the character is a zero width space.
 177  */
 178 DALI_IMPORT_API bool IsZeroWidthSpace( Character character );
 179
 180 /**
 181  * @brief Whether the character is a left to right mark.
 182  *
 183  * @param[in] character The character.
 184  *
 185  * @return @e true if the character is a left to right mark.
 186  */
 187 DALI_IMPORT_API bool IsLeftToRightMark( Character character );
 188
 189 /**
 190  * @brief Whether the character is a right to left mark.
 191  *
 192  * @param[in] character The character.
 193  *
 194  * @return @e true if the character is a right to left mark.
 195  */
 196 DALI_IMPORT_API bool IsRightToLeftMark( Character character );
 197
 198 /**
 199  * @brief Whether the character is a thin space.
 200  *
 201  * @param[in] character The character.
 202  *
 203  * @return @e true if the character is a thin space.
 204  */
 205 DALI_IMPORT_API bool IsThinSpace( Character character );
 206
 207 /**
 208  * @brief Whether the character is common within all scripts.
 209  *
 210  * @param[in] character The character.
 211  *
 212  * @return @e true if the character is common within all scripts.
 213  */
 214 DALI_IMPORT_API bool IsCommonScript( Character character );
 215
 216 } // namespace TextAbstraction
 217
 218 } // namespace Dali
 219
 220 #endif // __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__