1 #ifndef __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
2 #define __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__
5 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
22 #include <dali/public-api/common/dali-common.h>
25 #include <dali/devel-api/text-abstraction/text-abstraction-definitions.h>
30 namespace TextAbstraction
34 * @brief Script is the writing system used by a language.
35 * Typically one script can be used to write different languages although one language could be written in different scrips.
39 COMMON, ///< Valid for all scripts. i.e white space or '\n'.
41 CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
42 GREEK, ///< The Greek script. Used by Greek.
43 LATIN, ///< The latin script. Used by many western languages and others around the world.
45 ARABIC, ///< The arabic script. Used by Arab and Urdu among others.
46 HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
48 ARMENIAN, ///< The Armenian script. Used by Armenian.
49 GEORGIAN, ///< The Georgian script. Used by Georgian.
51 CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
52 HANGUL, ///< The Hangul jamo script. Used by Korean.
53 HIRAGANA, ///< The Hiragana script. Used by the Japanese.
54 KATAKANA, ///< The Katakana script. Used by the Japanese.
56 BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
57 BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
58 DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
59 GUJARATI, ///< The Gujarati script. Used by Gujarati.
60 GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
61 KANNADA, ///< The Kannada script. Used by Kannada and Tulu.
62 MALAYALAM, ///< The Malayalam script. Used by Malayalam.
63 ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
64 SINHALA, ///< The Sinhala script. Used by Sinhala and Pali.
65 TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
66 TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
68 LAO, ///< The Lao script. Used by the Lao language.
69 THAI, ///< The Thai script. Used by the Thai language
70 KHMER, ///< The Khmer script. Used by the Khmer language.
72 EMOJI, ///< The Emoji which map to standardized Unicode characters.
74 UNKNOWN ///< The script is unknown.
77 const char* const ScriptName[] =
79 "COMMON", ///< Valid for all scripts. i.e white space or '\n'.
81 "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
82 "GREEK", ///< The Greek script. Used by Greek.
83 "LATIN", ///< The latin script. Used by many western languages and others around the world.
85 "ARABIC", ///< The arabic script. Used by Arab and Urdu among others.
86 "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
88 "ARMENIAN", ///< The Armenian script. Used by Armenian.
89 "GEORGIAN", ///< The Georgian script. Used by Georgian.
91 "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
92 "HANGUL", ///< The Hangul jamo script. Used by Korean.
93 "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
94 "KATAKANA", ///< The Katakana script. Used by the Japanese.
96 "BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
97 "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
98 "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
99 "GUJARATI", ///< The Gujarati script. Used by Gujarati.
100 "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
101 "KANNADA", ///< The Kannada script. Used by Kannada and Tulu.
102 "MALAYALAM", ///< The Malayalam script. Used by Malayalam.
103 "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
104 "SINHALA", ///< The Sinhala script. Used by Sinhala and Pali.
105 "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
106 "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
108 "LAO", ///< The Lao script. Used by the Lao language.
109 "THAI", ///< The Thai script. Used by the Thai language
110 "KHMER", ///< The Khmer script. Used by the Khmer language.
112 "EMOJI", ///< The Emoji which map to standardized Unicode characters.
114 "UNKNOWN" ///< The script is unknown.
118 * @brief Whether the script is a right to left script.
120 * @param[in] script The script.
122 * @return @e true if the script is right to left.
124 DALI_IMPORT_API bool IsRightToLeftScript( Script script );
127 * @brief Retrieves a character's script.
129 * @param[in] character The character.
131 * @return The chraracter's script.
133 DALI_IMPORT_API Script GetCharacterScript( Character character );
136 * @brief Whether the character is a white space.
138 * @param[in] character The character.
140 * @return @e true if the character is a white space.
142 DALI_IMPORT_API bool IsWhiteSpace( Character character );
145 * @brief Whether the character is a new paragraph character.
147 * @param[in] character The character.
149 * @return @e true if the character is a new paragraph character.
151 DALI_IMPORT_API bool IsNewParagraph( Character character );
154 * @brief Whether the character is a zero width non joiner.
156 * @param[in] character The character.
158 * @return @e true if the character is a zero width non joiner.
160 DALI_IMPORT_API bool IsZeroWidthNonJoiner( Character character );
163 * @brief Whether the character is a zero width joiner.
165 * @param[in] character The character.
167 * @return @e true if the character is a zero width joiner.
169 DALI_IMPORT_API bool IsZeroWidthJoiner( Character character );
172 * @brief Whether the character is a zero width space.
174 * @param[in] character The character.
176 * @return @e true if the character is a zero width space.
178 DALI_IMPORT_API bool IsZeroWidthSpace( Character character );
181 * @brief Whether the character is a left to right mark.
183 * @param[in] character The character.
185 * @return @e true if the character is a left to right mark.
187 DALI_IMPORT_API bool IsLeftToRightMark( Character character );
190 * @brief Whether the character is a right to left mark.
192 * @param[in] character The character.
194 * @return @e true if the character is a right to left mark.
196 DALI_IMPORT_API bool IsRightToLeftMark( Character character );
199 * @brief Whether the character is a thin space.
201 * @param[in] character The character.
203 * @return @e true if the character is a thin space.
205 DALI_IMPORT_API bool IsThinSpace( Character character );
208 * @brief Whether the character is common within all scripts.
210 * @param[in] character The character.
212 * @return @e true if the character is common within all scripts.
214 DALI_IMPORT_API bool IsCommonScript( Character character );
216 } // namespace TextAbstraction
220 #endif // __DALI_TOOLKIT_TEXT_ABSTRACTION_SCRIPT_H__