1 #ifndef __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
2 #define __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
5 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
22 #include <dali/public-api/common/dali-common.h>
25 #include <dali/devel-api/text-abstraction/text-abstraction-definitions.h>
30 namespace TextAbstraction
34 * @brief Script is the writing system used by a language.
35 * Typically one script can be used to write different languages although one language could be written in different scrips.
39 COMMON, ///< Valid for all scripts. i.e white space or '\n'.
41 ASCII_DIGITS, ///< ASCII digits.
42 ASCII_PS, ///< ASCII punctuation and symbols.
44 C1_CONTROLS, ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
45 C1_PS, ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
46 C1_MATH, ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
48 SML_P, ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
49 PHONETIC_U, ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
50 PHONETIC_SS, ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
52 NUMERIC_SS, ///< Numeric subscripts and superscripts.
54 LETTER_LIKE, ///< Symbols of the Letterlike unicode block.
55 NUMBER_FORMS, ///< Number Forms unicode block.
56 FRACTIONS_NF, ///< Numeric fraction symbols of the Number Forms unicode block.
57 NON_LATIN_LED, ///< Non latin symbols within the Latin Extended D unicode block.
58 HWFW_S, ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
60 CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
61 GREEK, ///< The Greek script. Used by Greek.
62 LATIN, ///< The latin script. Used by many western languages and others around the world.
64 ARABIC, ///< The arabic script. Used by Arab and Urdu among others.
65 HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
67 ARMENIAN, ///< The Armenian script. Used by Armenian.
68 GEORGIAN, ///< The Georgian script. Used by Georgian.
70 CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
71 HANGUL, ///< The Hangul jamo script. Used by Korean.
72 HIRAGANA, ///< The Hiragana script. Used by the Japanese.
73 KATAKANA, ///< The Katakana script. Used by the Japanese.
74 BOPOMOFO, ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
76 BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
77 BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
78 DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
79 GUJARATI, ///< The Gujarati script. Used by Gujarati.
80 GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
81 KANNADA, ///< The Kannada script. Used by Kannada and Tulu.
82 MALAYALAM, ///< The Malayalam script. Used by Malayalam.
83 ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
84 SINHALA, ///< The Sinhala script. Used by Sinhala and Pali.
85 TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
86 TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
88 LAO, ///< The Lao script. Used by the Lao language.
89 THAI, ///< The Thai script. Used by the Thai language
90 KHMER, ///< The Khmer script. Used by the Khmer language.
92 EMOJI, ///< The Emoji which map to standardized Unicode characters.
94 SYMBOLS1, ///< Some symbols.
95 SYMBOLS2, ///< Some symbols.
96 SYMBOLS3, ///< Some symbols.
97 SYMBOLS4, ///< Some symbols.
98 SYMBOLS5, ///< Some symbols.
100 UNKNOWN ///< The script is unknown.
103 const char* const ScriptName[] =
105 "COMMON", ///< Valid for all scripts. i.e white space or '\n'.
107 "ASCII_DIGITS", ///< ASCII digits.
108 "ASCII_PS", ///< ASCII punctuation and symbols.
110 "C1_CONTROLS", ///< Controls of the C1 Controls and Latin-1 Supplement unicode block.
111 "C1_PS", ///< Punctuation and symbols of the C1 Controls and Latin-1 Supplement unicode block.
112 "C1_MATH", ///< Math symbols of the C1 Controls and Latin-1 Supplement unicode block.
114 "SML_P", ///< Punctuation symbols of the Spacing Modifier Letters unicode block.
115 "PHONETIC_U", ///< Uralic phonetic symbols of the Phonetic Extensions unicode block.
116 "PHONETIC_SS", ///< Subscripts and superscripts of the Phonetic Extensions unicode block.
118 "NUMERIC_SS", ///< Numeric subscripts and superscripts.
120 "LETTER_LIKE", ///< Symbols of the Letterlike unicode block.
121 "NUMBER_FORMS", ///< Number Forms unicode block.
122 "FRACTIONS_NF", ///< Numeric fraction symbols of the Number Forms unicode block.
123 "NON_LATIN_LED", ///< Non latin symbols within the Latin Extended D unicode block.
124 "HWFW_S", ///< Non latin symbols within the Halfwidth and fullwidth unicode block.
126 "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
127 "GREEK", ///< The Greek script. Used by Greek.
128 "LATIN", ///< The latin script. Used by many western languages and others around the world.
130 "ARABIC", ///< The arabic script. Used by Arab and Urdu among others.
131 "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
133 "ARMENIAN", ///< The Armenian script. Used by Armenian.
134 "GEORGIAN", ///< The Georgian script. Used by Georgian.
136 "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
137 "HANGUL", ///< The Hangul jamo script. Used by Korean.
138 "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
139 "KATAKANA", ///< The Katakana script. Used by the Japanese.
140 "BOPOMOFO", ///< The Bopomofo script. Also called Zhuyin fuhao or Zhuyin. A phonetic notation used for the transcription of spoken Chinese.
142 "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
143 "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
144 "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
145 "GUJARATI", ///< The Gujarati script. Used by Gujarati.
146 "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
147 "KANNADA", ///< The Kannada script. Used by Kannada and Tulu.
148 "MALAYALAM", ///< The Malayalam script. Used by Malayalam.
149 "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
150 "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali.
151 "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
152 "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
154 "LAO", ///< The Lao script. Used by the Lao language.
155 "THAI", ///< The Thai script. Used by the Thai language
156 "KHMER", ///< The Khmer script. Used by the Khmer language.
158 "EMOJI", ///< The Emoji which map to standardized Unicode characters.
160 "SYMBOLS1", ///< Some symbols.
161 "SYMBOLS2", ///< Some symbols.
162 "SYMBOLS3", ///< Some symbols.
163 "SYMBOLS4", ///< Some symbols.
164 "SYMBOLS5", ///< Some symbols.
166 "UNKNOWN" ///< The script is unknown.
170 * @brief Whether the script is a right to left script.
172 * @param[in] script The script.
174 * @return @e true if the script is right to left.
176 DALI_IMPORT_API bool IsRightToLeftScript( Script script );
179 * @brief Retrieves a character's script.
181 * @param[in] character The character.
183 * @return The chraracter's script.
185 DALI_IMPORT_API Script GetCharacterScript( Character character );
188 * @brief Whether the character is a white space.
190 * @param[in] character The character.
192 * @return @e true if the character is a white space.
194 DALI_IMPORT_API bool IsWhiteSpace( Character character );
197 * @brief Whether the character is a new paragraph character.
199 * @param[in] character The character.
201 * @return @e true if the character is a new paragraph character.
203 DALI_IMPORT_API bool IsNewParagraph( Character character );
206 * @brief Whether the character is a zero width non joiner.
208 * @param[in] character The character.
210 * @return @e true if the character is a zero width non joiner.
212 DALI_IMPORT_API bool IsZeroWidthNonJoiner( Character character );
215 * @brief Whether the character is a zero width joiner.
217 * @param[in] character The character.
219 * @return @e true if the character is a zero width joiner.
221 DALI_IMPORT_API bool IsZeroWidthJoiner( Character character );
224 * @brief Whether the character is a zero width space.
226 * @param[in] character The character.
228 * @return @e true if the character is a zero width space.
230 DALI_IMPORT_API bool IsZeroWidthSpace( Character character );
233 * @brief Whether the character is a left to right mark.
235 * @param[in] character The character.
237 * @return @e true if the character is a left to right mark.
239 DALI_IMPORT_API bool IsLeftToRightMark( Character character );
242 * @brief Whether the character is a right to left mark.
244 * @param[in] character The character.
246 * @return @e true if the character is a right to left mark.
248 DALI_IMPORT_API bool IsRightToLeftMark( Character character );
251 * @brief Whether the character is a thin space.
253 * @param[in] character The character.
255 * @return @e true if the character is a thin space.
257 DALI_IMPORT_API bool IsThinSpace( Character character );
260 * @brief Whether the character is common within all scripts.
262 * @param[in] character The character.
264 * @return @e true if the character is common within all scripts.
266 DALI_IMPORT_API bool IsCommonScript( Character character );
269 * @brief Whether the script contains ligatures that must be 'broken' for selection or cursor position.
271 * i.e The latin script has the 'ff' or 'fi' ligatures that need to be broken to position the cursor
272 * between the two characters. Equally the arabic script has the 'ﻻ' ligature that needs to be broken.
274 * @param[in] script The script.
276 * @return @e true if the script has ligatures that must be 'broken'.
278 DALI_IMPORT_API bool HasLigatureMustBreak( Script script );
279 } // namespace TextAbstraction
283 #endif // __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__