4 * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include <unicode/ustring.h>
20 #include <unicode/unorm.h>
21 #include <unicode/ucol.h>
22 #include <unicode/uset.h>
24 #include "ctsvc_internal.h"
25 #include "ctsvc_localize_utils.h"
27 int ctsvc_check_utf8(char c)
29 if ((c & 0xff) < (128 & 0xff))
31 else if ((c & (char)0xe0) == (char)0xc0)
33 else if ((c & (char)0xf0) == (char)0xe0)
35 else if ((c & (char)0xf8) == (char)0xf0)
37 else if ((c & (char)0xfc) == (char)0xf8)
39 else if ((c & (char)0xfe) == (char)0xfc)
42 return CONTACTS_ERROR_INVALID_PARAMETER;
46 * check language type with first word(UChar) using UBLOCK
48 int ctsvc_check_language(UChar *word)
52 if (u_isdigit(word[0])) {
53 type = CTSVC_LANG_NUMBER;
54 } else if (u_isalpha(word[0])) {
56 * refer to the uchar.h
57 * #define U_GC_L_MASK (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
58 * U_GC_LU_MASK : U_UPPERCASE_LETTER
59 * U_GC_LL_MASK : U_LOWERCASE_LETTER
60 * U_GC_LT_MASK : U_TITLECASE_LETTER
61 * U_GC_LM_MASK : U_MODIFIER_LETTER
62 * U_GC_LO_MASK : U_OTHER_LETTER
65 UBlockCode code = ublock_getCode(word[0]);
66 DBG("Character unicode block is %d", code);
70 case UBLOCK_BASIC_LATIN: /* =1, [0000] */
71 case UBLOCK_LATIN_1_SUPPLEMENT: /* =2, [0080] */
72 case UBLOCK_LATIN_EXTENDED_A: /* =3, [0100] */
73 case UBLOCK_LATIN_EXTENDED_B: /* =4, [0180] */
74 case UBLOCK_LATIN_EXTENDED_ADDITIONAL: /* =38, [1E00] */
75 type = CTSVC_LANG_ENGLISH;
76 /* type = CTSVC_LANG_CATALAN; */ /* ca, Spain - Catalan */
77 /* type = CTSVC_LANG_GERMAN; */ /* de, Germany - German */
78 /* type = CTSVC_LANG_BASQUE; */ /* eu, Spain - Basque */
79 /* type = CTSVC_LANG_DUTCH; */ /* nl_Nl, Netherlands Dutch */
80 /* type = CTSVC_LANG_FRENCH; */ /* fr_CA, fr_FR */
81 /* type = CTSVC_LANG_ITALIAN; */ /* it_IT, Italy - Italian */
82 /* type = CTSVC_LANG_PORTUGUESE; *//* pt_BR, pt_PT, Portugal */
83 /* type = CTSVC_LANG_SPANISH; */ /*/ es_ES, es_US, El Salvador - Spanish */
84 /* type = CTSVC_LANG_NORWAY; */ /* nb, Norway */
85 /* type = CTSVC_LANG_DANISH; */ /* da, Denmark - Danish */
86 /* type = CTSVC_LANG_AZERBAIJAN; *//* az, Azerbaijan */
87 /* type = CTSVC_LANG_ROMANIA; */ /* ro, Romania */
88 /* type = CTSVC_LANG_CZECH; */ /* cs, Czech Republic - Czech */
89 /* type = CTSVC_LANG_ESTONIAN; */ /* et, Estonia - Estonian */
90 /* type = CTSVC_LANG_FINNISH; */ /* fi, Finland - Finnish */
91 /* type = CTSVC_LANG_IRISH; */ /* ga, Ireland - Irish */
92 /* type = CTSVC_LANG_GALICIAN; */ /* gl, Spain - Galician */
93 /* type = CTSVC_LANG_HUNGARIAN; */ /* hu, Hungary - Hungarian */
94 /* type = CTSVC_LANG_SWEDISH; */ /* sv, Finland - Swedish */
95 /* type = CTSVC_LANG_SLOVENIAN; */ /* sl, Slovenia - Slovenian */
96 /* type = CTSVC_LANG_SLOVAK; */ /* sk, Slovakia - Slovak */
97 /* type = CTSVC_LANG_LITHUANIAN; *//* lt, Lithuania - Lithuanian */
98 /* type = CTSVC_LANG_POLISH; */ /* pl, Polish */
99 /* type = CTSVC_LANG_LATVIAN; */ /* lv, Latvia - Latvian */
100 /* type = CTSVC_LANG_CROATIAN; */ /* hr, Bosnia and Herzegovina - Croatian */
101 /* type = CTSVC_LANG_ICELANDIC; */ /* is, Iceland - Icelandic */
105 case UBLOCK_HANGUL_JAMO: /* =30, [1100] */
106 case UBLOCK_HANGUL_COMPATIBILITY_JAMO: /* =65, [3130] */
107 case UBLOCK_HANGUL_SYLLABLES: /* =74, [AC00] */
108 case UBLOCK_HANGUL_JAMO_EXTENDED_A: /* =180, [A960] */
109 case UBLOCK_HANGUL_JAMO_EXTENDED_B: /* =185, [D7B0] */
110 type = CTSVC_LANG_KOREAN;
114 case UBLOCK_CJK_RADICALS_SUPPLEMENT: /* =58, [2E80] */
115 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: /* =61, [3000] */
116 case UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS: /* =68, [3200] */
117 case UBLOCK_CJK_STROKES: /* =130, [31C0] */
118 case UBLOCK_CJK_COMPATIBILITY: /* =69, [3300] */
119 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: /* =70, [3400] */
120 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: /* =71, [4E00] */
121 case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: /* =79, [F900] */
122 case UBLOCK_CJK_COMPATIBILITY_FORMS: /* =83, [FE30] */
123 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B: /* =94, [20000] */
124 case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT: /* =95, [2F800] */
125 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C: /* =197, [2A700] */
126 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D: /* =209, [2B740] */
127 /* LCOV_EXCL_START */
128 type = CTSVC_LANG_CHINESE;
132 case UBLOCK_HIRAGANA: /* =62, [3040] */
133 case UBLOCK_KATAKANA: /* =63, [30A0] */
134 case UBLOCK_KATAKANA_PHONETIC_EXTENSIONS: /* =107, [31F0] */
135 case UBLOCK_JAVANESE: /* =181, [A980] */
136 type = CTSVC_LANG_JAPANESE;
139 case UBLOCK_GREEK: /* =8, [0370] */
140 case UBLOCK_GREEK_EXTENDED: /* =39, [1F00] */
141 type = CTSVC_LANG_GREEK;
144 case UBLOCK_CYRILLIC: /* =9, [0400] */
145 case UBLOCK_CYRILLIC_EXTENDED_A: /* =158, [2DE0] */
146 case UBLOCK_CYRILLIC_EXTENDED_B: /* =160, [A640] */
147 case UBLOCK_CYRILLIC_SUPPLEMENTARY: /* =97, UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, [0500] */
148 type = CTSVC_LANG_RUSSIAN;
149 /* type = CTSVC_LANG_BULGARIAN: */ /* bg, Bulgaria - Bulgarian */
150 /* type = CTSVC_LANG_MACEDONIA: */ /* mk, Macedonia */
151 /* type = CTSVC_LANG_KAZAKHSTAN: */ /* kk, Kazakhstan */
152 /* type = CTSVC_LANG_SERBIAN: */ /* sr, Serbia - Serbian */
153 /* type = CTSVC_LANG_UKRAINE: */ /* uk, Ukraine */
156 case UBLOCK_ARMENIAN: /* =10, [0530] */
157 type = CTSVC_LANG_ARMENIAN;
159 case UBLOCK_ARABIC: /* =12, [0600] */
160 type = CTSVC_LANG_ARABIC;
162 case UBLOCK_DEVANAGARI: /* =15, [0900] */
163 case UBLOCK_DEVANAGARI_EXTENDED: /* =179, [A8E0] */
164 type = CTSVC_LANG_HINDI;
166 case UBLOCK_GEORGIAN: /* =29, [10A0] */
167 case UBLOCK_GEORGIAN_SUPPLEMENT: /* =135, [2D00] */
168 type = CTSVC_LANG_GEORGIAN;
170 case UBLOCK_OLD_TURKIC: /* =191, [10C00] */
171 type = CTSVC_LANG_TURKISH;
173 case UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS: /* =87, [FF00] hangul : FFA0 ~ FFDC */
174 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF21, word[0], (UChar)0xFF3A)
175 || CTSVC_COMPARE_BETWEEN((UChar)0xFF41, word[0], (UChar)0xFF5A))
176 type = CTSVC_LANG_ENGLISH;
177 else if (CTSVC_COMPARE_BETWEEN((UChar)0xFF10, word[0], (UChar)0xFF19))
178 type = CTSVC_LANG_NUMBER;
179 else if (CTSVC_COMPARE_BETWEEN((UChar)0xFF65, word[0], (UChar)0xFF9F))
180 type = CTSVC_LANG_JAPANESE;
181 else if (CTSVC_COMPARE_BETWEEN((UChar)0xFFA0, word[0], (UChar)0xFFDC))
182 type = CTSVC_LANG_KOREAN;
184 type = CTSVC_LANG_OTHERS;
187 type = CTSVC_LANG_OTHERS;
191 type = CTSVC_LANG_OTHERS;
194 DBG("language type = %d", type);
198 /* check language type by first word(char*) using UBLOCK */
199 int ctsvc_check_language_type(const char *src)
205 UErrorCode status = 0;
208 length = ctsvc_check_utf8(src[0]);
209 RETVM_IF(length <= 0, CONTACTS_ERROR_INTERNAL, "check_utf8() Fail");
211 strncpy(temp, src, length);
213 DBG("temp(%s) src(%s) length(%d)", temp, src, length);
215 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, temp, -1, &status);
216 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
217 "u_strFromUTF8() Fail(%s)", u_errorName(status));
219 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
220 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
221 "u_strToLower() Fail(%s)", u_errorName(status));
223 unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
224 (UChar *)result, array_sizeof(result), &status);
225 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
226 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
228 DBG("0x%x%x", (0xFF00 & (tmp_result[0])) >> 8, (0xFF & (tmp_result[0])));
230 return ctsvc_check_language(result);
233 return CONTACTS_ERROR_INVALID_PARAMETER;