// 0x0591 - 0x05f4 Hebrew
// 0xfb1d - 0xfb4f Hebrew subset of Alphabetic Presentation Forms
+ // Cyrillic script
+ // 0x0400 - 0x04ff Cyrillic
+ // 0x0500 - 0x052f Cyrillic suplement
+ // 0x2de0 - 0x2dff Cyrillic Extended-A
+ // 0xa640 - 0xa69f Cyrillic Extended-B
+
+ // Georgian script
+ // 0x10a0 - 0x10ff Georgian
+ // 0x2d00 - 0x2d2f Georgian suplement
+
+ // Greek script
+ // 0x0370 - 0x03ff Greek & Coptic
+ // 0x1f00 - 0x1fff Greek Extended
+
+ // Armenian script
+ // 0x0530 - 0x058f Armenian
+ // 0xfb13 - 0xfb17 Armenian subset of Alphabetic prefentation forms
+
// The Emoji which map to standardized Unicode characters
// 1. Emoticons ( 1F601 - 1F64F )
// 2. Dingbats ( 2702 - 27B0 )
{
return LATIN;
}
+ if( ( 0x0370 <= character ) && ( character <= 0x03ff ) )
+ {
+ return GREEK;
+ }
+ if( ( 0x0400 <= character ) && ( character <= 0x04ff ) )
+ {
+ return CYRILLIC;
+ }
+ if( ( 0x0500 <= character ) && ( character <= 0x052f ) )
+ {
+ return CYRILLIC;
+ }
+ if( ( 0x0530 <= character ) && ( character <= 0x058f ) )
+ {
+ return ARMENIAN;
+ }
if( ( 0x0591 <= character ) && ( character <= 0x05f4 ) )
{
return HEBREW;
{
return BURMESE;
}
+ if( ( 0x10a0 <= character ) && ( character <= 0x10ff ) )
+ {
+ return GEORGIAN;
+ }
if( ( 0x1100 <= character ) && ( character <= 0x11ff ) )
{
return HANGUL;
}
else // > 0x1eff
{
+ if( ( 0x1f00 <= character ) && ( character <= 0x1fff ) )
+ {
+ return GREEK;
+ }
if( character == 0x203c )
{
return EMOJI; // 5. Uncategorized: double exclamation mark
{
if( character <= 0xfdff )
{
+ if( ( 0x2d00 <= character ) && ( character <= 0x2d2f ) )
+ {
+ return GEORGIAN;
+ }
+ if( ( 0x2de0 <= character ) && ( character <= 0x2dff ) )
+ {
+ return CYRILLIC;
+ }
if( ( 0x2e80 <= character ) && ( character <= 0x2eff ) )
{
return CJK;
{
return CJK;
}
+ if( ( 0xa640 <= character ) && ( character <= 0xa69f ) )
+ {
+ return CYRILLIC;
+ }
if( ( 0xa720 <= character ) && ( character <= 0xa7ff ) )
{
return LATIN;
{
return LATIN;
}
+ if( ( 0xfb13 <= character ) && ( character <= 0xfb17 ) )
+ {
+ return ARMENIAN;
+ }
if( ( 0xfb1d <= character ) && ( character <= 0xfb4f ) )
{
return HEBREW;
*/
enum Script
{
+ CYRILLIC, ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+ GREEK, ///< The Greek script. Used by Greek.
LATIN, ///< The latin script. Used by many western languages and others around the world.
+
ARABIC, ///< The arabic script. Used by Arab and Urdu among others.
- DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+ HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+ ARMENIAN, ///< The Armenian script. Used by Armenian.
+ GEORGIAN, ///< The Georgian script. Used by Georgian.
+
+ CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+ HANGUL, ///< The Hangul jamo script. Used by Korean.
+ HIRAGANA, ///< The Hiragana script. Used by the Japanese.
+ KATAKANA, ///< The Katakana script. Used by the Japanese.
+
BENGALI, ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
- GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
+ BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
+ DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
GUJARATI, ///< The Gujarati script. Used by Gujarati.
- ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
- TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
- TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+ GURMUKHI, ///< The Gurmukhi script. Used by Punjabi.
KANNADA, ///< The Kannada script. Used by Kannada and Tulu.
MALAYALAM, ///< The Malayalam script. Used by Malayalam.
+ ORIYA, ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
SINHALA, ///< The Sinhala script. Used by Sinhala and Pali.
- CJK, ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
- HANGUL, ///< The Hangul jamo script. Used by Korean.
- KHMER, ///< The Khmer script. Used by the Khmer language.
+ TAMIL, ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+ TELUGU, ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
LAO, ///< The Lao script. Used by the Lao language.
THAI, ///< The Thai script. Used by the Thai language
- BURMESE, ///< The Burmese script. Used by the Burmese (Myanmar) language.
- HEBREW, ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
- HIRAGANA, ///< The Hiragana script. Used by the Japanese.
- KATAKANA, ///< The Katakana script. Used by the Japanese.
+ KHMER, ///< The Khmer script. Used by the Khmer language.
+
EMOJI, ///< The Emoji which map to standardized Unicode characters.
+
UNKNOWN ///< The script is unknown.
};
const char* const ScriptName[] =
{
+ "CYRILLIC", ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+ "GREEK", ///< The Greek script. Used by Greek.
"LATIN", ///< The latin script. Used by many western languages and others around the world.
+
"ARABIC", ///< The arabic script. Used by Arab and Urdu among others.
- "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+ "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+ "ARMENIAN", ///< The Armenian script. Used by Armenian.
+ "GEORGIAN", ///< The Georgian script. Used by Georgian.
+
+ "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+ "HANGUL", ///< The Hangul jamo script. Used by Korean.
+ "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
+ "KATAKANA", ///< The Katakana script. Used by the Japanese.
+
"BENGALI", ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
- "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
+ "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
+ "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
"GUJARATI", ///< The Gujarati script. Used by Gujarati.
- "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
- "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
- "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+ "GURMUKHI", ///< The Gurmukhi script. Used by Punjabi.
"KANNADA", ///< The Kannada script. Used by Kannada and Tulu.
"MALAYALAM", ///< The Malayalam script. Used by Malayalam.
+ "ORIYA", ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
"SINHALA", ///< The Sinhala script. Used by Sinhala and Pali.
- "CJK", ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
- "HANGUL", ///< The Hangul jamo script. Used by Korean.
- "KHMER", ///< The Khmer script. Used by the Khmer language.
+ "TAMIL", ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+ "TELUGU", ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
"LAO", ///< The Lao script. Used by the Lao language.
"THAI", ///< The Thai script. Used by the Thai language
- "BURMESE", ///< The Burmese script. Used by the Burmese (Myanmar) language.
- "HEBREW", ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
- "HIRAGANA", ///< The Hiragana script. Used by the Japanese.
- "KATAKANA", ///< The Katakana script. Used by the Japanese.
+ "KHMER", ///< The Khmer script. Used by the Khmer language.
+
"EMOJI", ///< The Emoji which map to standardized Unicode characters.
+
"UNKNOWN" ///< The script is unknown.
};