4 * Copyright (c) 2010 - 2012 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include <unicode/ustring.h>
20 #include <unicode/unorm.h>
21 #include <unicode/ucol.h>
22 #include <unicode/uset.h>
24 #include "ctsvc_internal.h"
25 #include "ctsvc_normalize.h"
26 #include "ctsvc_localize.h"
27 #include "ctsvc_localize_utils.h"
29 #include "ctsvc_localize_kor.h"
30 #include "ctsvc_localize_jp.h"
32 int ctsvc_get_sort_type_from_language(int language)
35 case CTSVC_LANG_CHINESE:
36 return CTSVC_SORT_CJK;
37 case CTSVC_LANG_JAPANESE:
38 return CTSVC_SORT_JAPANESE;
39 case CTSVC_LANG_KOREAN:
40 return CTSVC_SORT_KOREAN;
41 case CTSVC_LANG_ENGLISH:
42 return CTSVC_SORT_WESTERN;
43 case CTSVC_LANG_NUMBER:
44 return CTSVC_SORT_NUMBER;
45 case CTSVC_LANG_RUSSIAN:
46 case CTSVC_LANG_BULGARIAN:
47 case CTSVC_LANG_MACEDONIA:
48 case CTSVC_LANG_KAZAKHSTAN:
49 case CTSVC_LANG_SERBIAN:
50 case CTSVC_LANG_UKRAINE:
51 return CTSVC_SORT_CYRILLIC;
52 case CTSVC_LANG_ARMENIAN:
53 return CTSVC_SORT_ARMENIAN;
54 case CTSVC_LANG_GREEK:
55 return CTSVC_SORT_GREEK;
56 case CTSVC_LANG_ARABIC:
57 case CTSVC_LANG_PERSIAN:
59 return CTSVC_SORT_ARABIC;
60 case CTSVC_LANG_HINDI:
61 return CTSVC_SORT_DEVANAGARI;
62 case CTSVC_LANG_GEORGIAN:
63 return CTSVC_SORT_GEORGIAN;
64 case CTSVC_LANG_TURKISH:
65 return CTSVC_SORT_TURKISH;
67 return CTSVC_SORT_THAI;
68 case CTSVC_LANG_BENGALI:
69 return CTSVC_SORT_BENGALI;
70 case CTSVC_LANG_PUNJABI:
71 return CTSVC_SORT_PUNJABI;
72 case CTSVC_LANG_MALAYALAM:
73 return CTSVC_SORT_MALAYALAM;
74 case CTSVC_LANG_TELUGU:
75 return CTSVC_SORT_TELUGU;
76 case CTSVC_LANG_TAMIL:
77 return CTSVC_SORT_TAMIL;
78 case CTSVC_LANG_ORIYA:
79 return CTSVC_SORT_ORIYA;
80 case CTSVC_LANG_SINHALA:
81 return CTSVC_SORT_SINHALA;
82 case CTSVC_LANG_GUJARATI:
83 return CTSVC_SORT_GUJARATI;
84 case CTSVC_LANG_KANNADA:
85 return CTSVC_SORT_KANNADA;
87 return CTSVC_SORT_LAO;
88 case CTSVC_LANG_HEBREW:
89 return CTSVC_SORT_HEBREW;
90 case CTSVC_LANG_BURMESE:
91 return CTSVC_SORT_BURMESE;
92 case CTSVC_LANG_KHMER:
93 return CTSVC_SORT_KHMER;
94 case CTSVC_LANG_OTHERS:
95 return CTSVC_SORT_OTHERS;
97 return CTSVC_SORT_WESTERN;
101 int ctsvc_get_name_sort_type(const char *src)
103 UErrorCode status = 0;
104 UChar tmp_result[10];
105 int ret = CTSVC_SORT_OTHERS;
110 char_len = ctsvc_check_utf8(src[0]);
111 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
113 memcpy(char_src, &src[0], char_len);
114 char_src[char_len] = '\0';
116 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
117 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
118 "u_strFromUTF8() Fail(%s)", u_errorName(status));
120 language_type = ctsvc_check_language(tmp_result);
121 ret = ctsvc_get_sort_type_from_language(language_type);
126 void ctsvc_extra_normalize(UChar *word, int32_t word_size)
129 for (i=0;i<word_size;i++) {
130 /* FF00 ~ FF60, FFE0~FFE6 : fullwidth -> halfwidth */
131 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) {
132 int unicode_value1 = 0;
133 int unicode_value2 = 0;
134 unicode_value1 = 0x0;
135 unicode_value2 = (0xFF & word[i]) + 0x20;
136 word[i] = unicode_value1 << 8 | unicode_value2;
138 else if (ctsvc_is_hangul(word[i])) {
139 ctsvc_hangul_compatibility2jamo(&word[i]);
144 const char *ctsvc_get_language_locale(int lang)
146 char *langset = ctsvc_get_langset();
149 case CTSVC_LANG_AZERBAIJAN: /* az, Azerbaijan */
151 case CTSVC_LANG_ARABIC: /* ar, Bahrain - Arabic */
153 case CTSVC_LANG_BULGARIAN: /* bg, Bulgaria - Bulgarian */
155 case CTSVC_LANG_CATALAN: /* ca, Spain - Catalan */
157 case CTSVC_LANG_CZECH: /* cs, Czech Republic - Czech */
159 case CTSVC_LANG_DANISH: /* da, Denmark - Danish */
161 case CTSVC_LANG_GERMAN: /* de, Germany - German */
163 case CTSVC_LANG_GREEK: /* el, Greece - Greek */
165 case CTSVC_LANG_ENGLISH: /* en, en_PH, en_US */
167 case CTSVC_LANG_SPANISH: /* es_ES, es_US, El Salvador - Spanish */
169 case CTSVC_LANG_ESTONIAN: /* et, Estonia - Estonian */
171 case CTSVC_LANG_BASQUE: /* eu, Spain - Basque */
173 case CTSVC_LANG_FINNISH: /* fi, Finland - Finnish */
175 case CTSVC_LANG_FRENCH: /* fr_CA, fr_FR */
177 case CTSVC_LANG_IRISH: /* ga, Ireland - Irish */
179 case CTSVC_LANG_GALICIAN: /* gl, Spain - Galician */
181 case CTSVC_LANG_HINDI: /* hi, India - Hindi, Marathi, Nepali */
182 if (STRING_EQUAL == strncmp(langset, "hi", strlen("hi"))) {
185 else if (STRING_EQUAL == strncmp(langset, "mr", strlen("mr"))) {
188 else if (STRING_EQUAL == strncmp(langset, "ne", strlen("ne"))) {
192 case CTSVC_LANG_CROATIAN: /* hr, Bosnia and Herzegovina - Croatian */
194 case CTSVC_LANG_HUNGARIAN: /* hu, Hungary - Hungarian */
196 case CTSVC_LANG_ARMENIAN: /* hy, Armenia - Armenian */
198 case CTSVC_LANG_ICELANDIC: /* is, Iceland - Icelandic */
200 case CTSVC_LANG_ITALIAN: /* it_IT, Italy - Italian */
202 case CTSVC_LANG_JAPANESE: /* ja_JP, japan */
204 case CTSVC_LANG_GEORGIAN: /* ka, Georgia - Georgian */
206 case CTSVC_LANG_KAZAKHSTAN: /* kk, Kazakhstan */
208 case CTSVC_LANG_KOREAN: /* ko, ko_KR */
210 case CTSVC_LANG_LITHUANIAN: /* lt, Lithuania - Lithuanian */
212 case CTSVC_LANG_LATVIAN: /* lv, Latvia - Latvian */
214 case CTSVC_LANG_MACEDONIA: /* mk, Macedonia */
216 case CTSVC_LANG_NORWAY: /* nb, Norway */
218 case CTSVC_LANG_DUTCH: /* nl_Nl, Netherlands Dutch */
220 case CTSVC_LANG_POLISH: /* pl, Polish */
222 case CTSVC_LANG_PORTUGUESE: /* pt_BR, pt_PT, Portugal */
224 case CTSVC_LANG_ROMANIA: /* ro, Romania */
226 case CTSVC_LANG_RUSSIAN: /* ru_RU, Russia */
228 case CTSVC_LANG_SLOVAK: /* sk, Slovakia - Slovak */
230 case CTSVC_LANG_SLOVENIAN: /* sl, Slovenia - Slovenian */
232 case CTSVC_LANG_SERBIAN: /* sr, Serbia - Serbian */
234 case CTSVC_LANG_SWEDISH: /* sv, Finland - Swedish */
236 case CTSVC_LANG_TURKISH: /* tr_TR, Turkey - Turkish */
238 case CTSVC_LANG_UKRAINE: /* uk, Ukraine */
240 case CTSVC_LANG_CHINESE: /* zh_CN, zh_HK, zh_SG, zh_TW */
242 case CTSVC_LANG_THAI: /* th_TH, Thai */
244 case CTSVC_LANG_BENGALI: /* as, bn */
245 if (STRING_EQUAL == strncmp(langset, "as", strlen("as"))) {
249 case CTSVC_LANG_PUNJABI: /* pa, India */
251 case CTSVC_LANG_MALAYALAM:
253 case CTSVC_LANG_TELUGU:
255 case CTSVC_LANG_TAMIL:
257 case CTSVC_LANG_ORIYA:
259 case CTSVC_LANG_SINHALA:
261 case CTSVC_LANG_GUJARATI:
263 case CTSVC_LANG_KANNADA:
267 case CTSVC_LANG_HEBREW:
269 case CTSVC_LANG_VIETNAMESE:
271 case CTSVC_LANG_PERSIAN:
273 case CTSVC_LANG_UZBEK:
275 case CTSVC_LANG_URDU:
277 case CTSVC_LANG_ALBANIAN:
279 case CTSVC_LANG_BURMESE:
281 case CTSVC_LANG_MALAY:
283 case CTSVC_LANG_KHMER:
285 case CTSVC_LANG_INDONESIAN:
287 case CTSVC_LANG_TAGALOG:
294 int ctsvc_get_language_type(const char *system_lang)
296 /* refer to the VCONFKEY_LANGSET */
299 RETV_IF(NULL == system_lang, CTSVC_LANG_OTHERS);
302 if (STRING_EQUAL == strncmp(system_lang, "az", strlen("az")))
303 type = CTSVC_LANG_AZERBAIJAN;
304 /* ar, Bahrain - Arabic */
305 else if (STRING_EQUAL == strncmp(system_lang, "ar", strlen("ar")))
306 type = CTSVC_LANG_ARABIC;
307 /* bg, Bulgaria - Bulgarian */
308 else if (STRING_EQUAL == strncmp(system_lang, "bg", strlen("bg")))
309 type = CTSVC_LANG_BULGARIAN;
310 /* ca, Spain - Catalan */
311 else if (STRING_EQUAL == strncmp(system_lang, "ca", strlen("ca")))
312 type = CTSVC_LANG_CATALAN;
313 /* cs, Czech Republic - Czech */
314 else if (STRING_EQUAL == strncmp(system_lang, "cs", strlen("cs")))
315 type = CTSVC_LANG_CZECH;
316 /* da, Denmark - Danish */
317 else if (STRING_EQUAL == strncmp(system_lang, "da", strlen("da")))
318 type = CTSVC_LANG_DANISH;
319 /* de, Germany - German */
320 else if (STRING_EQUAL == strncmp(system_lang, "de", strlen("de")))
321 type = CTSVC_LANG_GERMAN;
322 /* el, Greece - Greek */
323 else if (STRING_EQUAL == strncmp(system_lang, "el", strlen("el")))
324 type = CTSVC_LANG_GREEK;
325 /* en, en_PH, en_US */
326 else if (STRING_EQUAL == strncmp(system_lang, "en", strlen("en")))
327 type = CTSVC_LANG_ENGLISH;
328 /* es_ES, es_US, El Salvador - Spanish */
329 else if (STRING_EQUAL == strncmp(system_lang, "es", strlen("es")))
330 type = CTSVC_LANG_SPANISH;
331 /* et, Estonia - Estonian */
332 else if (STRING_EQUAL == strncmp(system_lang, "et", strlen("et")))
333 type = CTSVC_LANG_ESTONIAN;
334 /* eu, Spain - Basque */
335 else if (STRING_EQUAL == strncmp(system_lang, "eu", strlen("eu")))
336 type = CTSVC_LANG_BASQUE;
337 /* fi, Finland - Finnish */
338 else if (STRING_EQUAL == strncmp(system_lang, "fi", strlen("fi")))
339 type = CTSVC_LANG_FINNISH;
341 else if (STRING_EQUAL == strncmp(system_lang, "fr", strlen("fr")))
342 type = CTSVC_LANG_FRENCH;
343 /* ga, Ireland - Irish */
344 else if (STRING_EQUAL == strncmp(system_lang, "ga", strlen("ga")))
345 type = CTSVC_LANG_IRISH;
346 /* gl, Spain - Galician */
347 else if (STRING_EQUAL == strncmp(system_lang, "gl", strlen("gl")))
348 type = CTSVC_LANG_GALICIAN;
349 /* hi, India - Hindi */
350 else if (STRING_EQUAL == strncmp(system_lang, "hi", strlen("hi")))
351 type = CTSVC_LANG_HINDI;
352 /* mr, India - marathi */
353 else if (STRING_EQUAL == strncmp(system_lang, "mr", strlen("mr")))
354 type = CTSVC_LANG_HINDI;
355 /* ne, India - nepal */
356 else if (STRING_EQUAL == strncmp(system_lang, "ne", strlen("ne")))
357 type = CTSVC_LANG_HINDI;
358 /* hr, Bosnia and Herzegovina - Croatian */
359 else if (STRING_EQUAL == strncmp(system_lang, "hr", strlen("hr")))
360 type = CTSVC_LANG_CROATIAN;
361 /* hu, Hungary - Hungarian */
362 else if (STRING_EQUAL == strncmp(system_lang, "hu", strlen("hu")))
363 type = CTSVC_LANG_HUNGARIAN;
364 /* hy, Armenia - Armenian */
365 else if (STRING_EQUAL == strncmp(system_lang, "hy", strlen("hy")))
366 type = CTSVC_LANG_ARMENIAN;
367 /* is, Iceland - Icelandic */
368 else if (STRING_EQUAL == strncmp(system_lang, "is", strlen("is")))
369 type = CTSVC_LANG_ICELANDIC;
370 /* it_IT, Italy - Italian */
371 else if (STRING_EQUAL == strncmp(system_lang, "it", strlen("it")))
372 type = CTSVC_LANG_ITALIAN;
374 else if (STRING_EQUAL == strncmp(system_lang, "ja", strlen("ja")))
375 type = CTSVC_LANG_JAPANESE;
376 /* ka, Georgia - Georgian */
377 else if (STRING_EQUAL == strncmp(system_lang, "ka", strlen("ka")))
378 type = CTSVC_LANG_GEORGIAN;
380 else if (STRING_EQUAL == strncmp(system_lang, "kk", strlen("kk")))
381 type = CTSVC_LANG_KAZAKHSTAN;
383 else if (STRING_EQUAL == strncmp(system_lang, "ko", strlen("ko")))
384 type = CTSVC_LANG_KOREAN;
385 /* lt, Lithuania - Lithuanian */
386 else if (STRING_EQUAL == strncmp(system_lang, "lt", strlen("lt")))
387 type = CTSVC_LANG_LITHUANIAN;
388 /* lv, Latvia - Latvian */
389 else if (STRING_EQUAL == strncmp(system_lang, "lv", strlen("lv")))
390 type = CTSVC_LANG_LATVIAN;
392 else if (STRING_EQUAL == strncmp(system_lang, "mk", strlen("mk")))
393 type = CTSVC_LANG_MACEDONIA;
395 else if (STRING_EQUAL == strncmp(system_lang, "nb", strlen("nb")))
396 type = CTSVC_LANG_NORWAY;
397 /* nl_Nl, Netherlands Dutch */
398 else if (STRING_EQUAL == strncmp(system_lang, "nl", strlen("nl")))
399 type = CTSVC_LANG_DUTCH;
401 else if (STRING_EQUAL == strncmp(system_lang, "pl", strlen("pl")))
402 type = CTSVC_LANG_POLISH;
403 /* pt_BR, pt_PT, Portugal */
404 else if (STRING_EQUAL == strncmp(system_lang, "pt", strlen("pt")))
405 type = CTSVC_LANG_PORTUGUESE;
407 else if (STRING_EQUAL == strncmp(system_lang, "ro", strlen("ro")))
408 type = CTSVC_LANG_ROMANIA;
410 else if (STRING_EQUAL == strncmp(system_lang, "ru", strlen("ru")))
411 type = CTSVC_LANG_RUSSIAN;
412 /* sk, Slovakia - Slovak */
413 else if (STRING_EQUAL == strncmp(system_lang, "sk", strlen("sk")))
414 type = CTSVC_LANG_SLOVAK;
415 /* sl, Slovenia - Slovenian */
416 else if (STRING_EQUAL == strncmp(system_lang, "sl", strlen("sl")))
417 type = CTSVC_LANG_SLOVENIAN;
418 /* sr, Serbia - Serbian */
419 else if (STRING_EQUAL == strncmp(system_lang, "sr", strlen("sr")))
420 type = CTSVC_LANG_SERBIAN;
421 /* sv, Finland - Swedish */
422 else if (STRING_EQUAL == strncmp(system_lang, "sv", strlen("sv")))
423 type = CTSVC_LANG_SWEDISH;
424 /* tr_TR, Turkey - Turkish */
425 else if (STRING_EQUAL == strncmp(system_lang, "tr", strlen("tr")))
426 type = CTSVC_LANG_TURKISH;
428 else if (STRING_EQUAL == strncmp(system_lang, "uk", strlen("uk")))
429 type = CTSVC_LANG_UKRAINE;
430 /* zh_CN, zh_HK, zh_SG, zh_TW */
431 else if (STRING_EQUAL == strncmp(system_lang, "zh", strlen("zh")))
432 type = CTSVC_LANG_CHINESE;
434 else if (STRING_EQUAL == strncmp(system_lang, "th", strlen("th")))
435 type = CTSVC_LANG_THAI;
436 else if (STRING_EQUAL == strncmp(system_lang, "as", strlen("as")))
437 type = CTSVC_LANG_BENGALI;
438 else if (STRING_EQUAL == strncmp(system_lang, "bn", strlen("bn")))
439 type = CTSVC_LANG_BENGALI;
440 else if (STRING_EQUAL == strncmp(system_lang, "pa", strlen("pa")))
441 type = CTSVC_LANG_PUNJABI;
442 else if (STRING_EQUAL == strncmp(system_lang, "ml", strlen("ml")))
443 type = CTSVC_LANG_MALAYALAM;
444 else if (STRING_EQUAL == strncmp(system_lang, "te", strlen("te")))
445 type = CTSVC_LANG_TELUGU;
446 else if (STRING_EQUAL == strncmp(system_lang, "ta", strlen("ta")))
447 type = CTSVC_LANG_TAMIL;
448 else if (STRING_EQUAL == strncmp(system_lang, "or", strlen("or")))
449 type = CTSVC_LANG_ORIYA;
450 else if (STRING_EQUAL == strncmp(system_lang, "si", strlen("si")))
451 type = CTSVC_LANG_SINHALA;
452 else if (STRING_EQUAL == strncmp(system_lang, "gu", strlen("gu")))
453 type = CTSVC_LANG_GUJARATI;
454 else if (STRING_EQUAL == strncmp(system_lang, "kn", strlen("kn")))
455 type = CTSVC_LANG_KANNADA;
456 else if (STRING_EQUAL == strncmp(system_lang, "lo", strlen("lo")))
457 type = CTSVC_LANG_LAO;
458 else if (STRING_EQUAL == strncmp(system_lang, "he", strlen("he")))
459 type = CTSVC_LANG_HEBREW;
460 else if (STRING_EQUAL == strncmp(system_lang, "vi", strlen("vi")))
461 type = CTSVC_LANG_VIETNAMESE;
462 else if (STRING_EQUAL == strncmp(system_lang, "fa", strlen("fa")))
463 type = CTSVC_LANG_PERSIAN;
464 else if (STRING_EQUAL == strncmp(system_lang, "uz", strlen("uz")))
465 type = CTSVC_LANG_UZBEK;
466 else if (STRING_EQUAL == strncmp(system_lang, "ur", strlen("ur")))
467 type = CTSVC_LANG_URDU;
468 else if (STRING_EQUAL == strncmp(system_lang, "sq", strlen("sq")))
469 type = CTSVC_LANG_ALBANIAN;
470 else if (STRING_EQUAL == strncmp(system_lang, "my", strlen("my")))
471 type = CTSVC_LANG_BURMESE;
472 else if (STRING_EQUAL == strncmp(system_lang, "ms", strlen("ms")))
473 type = CTSVC_LANG_MALAY;
474 else if (STRING_EQUAL == strncmp(system_lang, "km", strlen("km")))
475 type = CTSVC_LANG_KHMER;
476 else if (STRING_EQUAL == strncmp(system_lang, "id", strlen("id")))
477 type = CTSVC_LANG_INDONESIAN;
478 else if (STRING_EQUAL == strncmp(system_lang, "tl", strlen("tl")))
479 type = CTSVC_LANG_TAGALOG;
481 type = CTSVC_LANG_OTHERS;
486 static char *langset = NULL;
488 char* ctsvc_get_langset()
493 void ctsvc_set_langset(char *new_langset)
496 langset = new_langset;