4 * Copyright (c) 2010 - 2012 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include <unicode/ustring.h>
20 #include <unicode/unorm.h>
21 #include <unicode/ucol.h>
22 #include <unicode/uset.h>
24 #include "ctsvc_internal.h"
25 #include "ctsvc_normalize.h"
26 #include "ctsvc_localize.h"
27 #include "ctsvc_localize_utils.h"
29 #include "ctsvc_localize_kor.h"
30 #include "ctsvc_localize_jp.h"
32 int ctsvc_get_sort_type_from_language(int language)
35 case CTSVC_LANG_CHINESE:
36 return CTSVC_SORT_CJK;
37 case CTSVC_LANG_JAPANESE:
38 return CTSVC_SORT_JAPANESE;
39 case CTSVC_LANG_KOREAN:
40 return CTSVC_SORT_KOREAN;
41 case CTSVC_LANG_ENGLISH:
42 return CTSVC_SORT_WESTERN;
43 case CTSVC_LANG_NUMBER:
44 return CTSVC_SORT_NUMBER;
45 case CTSVC_LANG_RUSSIAN:
46 case CTSVC_LANG_BULGARIAN:
47 case CTSVC_LANG_MACEDONIA:
48 case CTSVC_LANG_KAZAKHSTAN:
49 case CTSVC_LANG_SERBIAN:
50 case CTSVC_LANG_UKRAINE:
51 return CTSVC_SORT_CYRILLIC;
52 case CTSVC_LANG_ARMENIAN:
53 return CTSVC_SORT_ARMENIAN;
54 case CTSVC_LANG_GREEK:
55 return CTSVC_SORT_GREEK;
56 case CTSVC_LANG_ARABIC:
57 case CTSVC_LANG_PERSIAN:
59 return CTSVC_SORT_ARABIC;
60 case CTSVC_LANG_HINDI:
61 return CTSVC_SORT_DEVANAGARI;
62 case CTSVC_LANG_GEORGIAN:
63 return CTSVC_SORT_GEORGIAN;
64 case CTSVC_LANG_TURKISH:
65 return CTSVC_SORT_TURKISH;
67 return CTSVC_SORT_THAI;
68 case CTSVC_LANG_BENGALI:
69 return CTSVC_SORT_BENGALI;
70 case CTSVC_LANG_PUNJABI:
71 return CTSVC_SORT_PUNJABI;
72 case CTSVC_LANG_MALAYALAM:
73 return CTSVC_SORT_MALAYALAM;
74 case CTSVC_LANG_TELUGU:
75 return CTSVC_SORT_TELUGU;
76 case CTSVC_LANG_TAMIL:
77 return CTSVC_SORT_TAMIL;
78 case CTSVC_LANG_ORIYA:
79 return CTSVC_SORT_ORIYA;
80 case CTSVC_LANG_SINHALA:
81 return CTSVC_SORT_SINHALA;
82 case CTSVC_LANG_GUJARATI:
83 return CTSVC_SORT_GUJARATI;
84 case CTSVC_LANG_KANNADA:
85 return CTSVC_SORT_KANNADA;
87 return CTSVC_SORT_LAO;
88 case CTSVC_LANG_HEBREW:
89 return CTSVC_SORT_HEBREW;
90 case CTSVC_LANG_BURMESE:
91 return CTSVC_SORT_BURMESE;
92 case CTSVC_LANG_KHMER:
93 return CTSVC_SORT_KHMER;
94 case CTSVC_LANG_OTHERS:
95 return CTSVC_SORT_OTHERS;
97 return CTSVC_SORT_WESTERN;
101 int ctsvc_get_name_sort_type(const char *src)
103 UErrorCode status = 0;
104 UChar tmp_result[10];
105 int ret = CTSVC_SORT_OTHERS;
110 char_len = ctsvc_check_utf8(src[0]);
111 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
113 memcpy(char_src, &src[0], char_len);
114 char_src[char_len] = '\0';
116 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
117 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
118 "u_strFromUTF8() Fail(%s)", u_errorName(status));
120 language_type = ctsvc_check_language(tmp_result);
121 ret = ctsvc_get_sort_type_from_language(language_type);
126 void ctsvc_extra_normalize(UChar *word, int32_t word_size)
129 for (i=0;i<word_size;i++) {
130 /* FF00 ~ FF60, FFE0~FFE6 : fullwidth -> halfwidth */
131 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) {
132 int unicode_value1 = 0;
133 int unicode_value2 = 0;
134 unicode_value1 = 0x0;
135 unicode_value2 = (0xFF & word[i]) + 0x20;
136 word[i] = unicode_value1 << 8 | unicode_value2;
138 else if (ctsvc_is_hangul(word[i])) {
139 ctsvc_hangul_compatibility2jamo(&word[i]);
144 void ctsvc_extra_index_normalize(UChar *word, int32_t word_size)
147 for (i=0;i<word_size;i++) {
148 // FF00 ~ FF60, FFE0~FFE6 : fullwidth -> halfwidth
149 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) {
150 int unicode_value1 = 0;
151 int unicode_value2 = 0;
152 unicode_value1 = 0x0;
153 unicode_value2 = (0xFF & word[i]) + 0x20;
154 word[i] = unicode_value1 << 8 | unicode_value2;
156 else if (ctsvc_is_hangul(word[i])) {
157 ctsvc_hangul_jamo2compatibility(&word[i]);
162 const char *ctsvc_get_language_locale(int lang)
164 char *langset = ctsvc_get_langset();
167 case CTSVC_LANG_AZERBAIJAN: /* az, Azerbaijan */
169 case CTSVC_LANG_ARABIC: /* ar, Bahrain - Arabic */
171 case CTSVC_LANG_BULGARIAN: /* bg, Bulgaria - Bulgarian */
173 case CTSVC_LANG_CATALAN: /* ca, Spain - Catalan */
175 case CTSVC_LANG_CZECH: /* cs, Czech Republic - Czech */
177 case CTSVC_LANG_DANISH: /* da, Denmark - Danish */
179 case CTSVC_LANG_GERMAN: /* de, Germany - German */
181 case CTSVC_LANG_GREEK: /* el, Greece - Greek */
183 case CTSVC_LANG_ENGLISH: /* en, en_PH, en_US */
185 case CTSVC_LANG_SPANISH: /* es_ES, es_US, El Salvador - Spanish */
187 case CTSVC_LANG_ESTONIAN: /* et, Estonia - Estonian */
189 case CTSVC_LANG_BASQUE: /* eu, Spain - Basque */
191 case CTSVC_LANG_FINNISH: /* fi, Finland - Finnish */
193 case CTSVC_LANG_FRENCH: /* fr_CA, fr_FR */
195 case CTSVC_LANG_IRISH: /* ga, Ireland - Irish */
197 case CTSVC_LANG_GALICIAN: /* gl, Spain - Galician */
199 case CTSVC_LANG_HINDI: /* hi, India - Hindi, Marathi, Nepali */
200 if (STRING_EQUAL == strncmp(langset, "hi", strlen("hi"))) {
203 else if (STRING_EQUAL == strncmp(langset, "mr", strlen("mr"))) {
206 else if (STRING_EQUAL == strncmp(langset, "ne", strlen("ne"))) {
210 case CTSVC_LANG_CROATIAN: /* hr, Bosnia and Herzegovina - Croatian */
212 case CTSVC_LANG_HUNGARIAN: /* hu, Hungary - Hungarian */
214 case CTSVC_LANG_ARMENIAN: /* hy, Armenia - Armenian */
216 case CTSVC_LANG_ICELANDIC: /* is, Iceland - Icelandic */
218 case CTSVC_LANG_ITALIAN: /* it_IT, Italy - Italian */
220 case CTSVC_LANG_JAPANESE: /* ja_JP, japan */
222 case CTSVC_LANG_GEORGIAN: /* ka, Georgia - Georgian */
224 case CTSVC_LANG_KAZAKHSTAN: /* kk, Kazakhstan */
226 case CTSVC_LANG_KOREAN: /* ko, ko_KR */
228 case CTSVC_LANG_LITHUANIAN: /* lt, Lithuania - Lithuanian */
230 case CTSVC_LANG_LATVIAN: /* lv, Latvia - Latvian */
232 case CTSVC_LANG_MACEDONIA: /* mk, Macedonia */
234 case CTSVC_LANG_NORWAY: /* nb, Norway */
236 case CTSVC_LANG_DUTCH: /* nl_Nl, Netherlands Dutch */
238 case CTSVC_LANG_POLISH: /* pl, Polish */
240 case CTSVC_LANG_PORTUGUESE: /* pt_BR, pt_PT, Portugal */
242 case CTSVC_LANG_ROMANIA: /* ro, Romania */
244 case CTSVC_LANG_RUSSIAN: /* ru_RU, Russia */
246 case CTSVC_LANG_SLOVAK: /* sk, Slovakia - Slovak */
248 case CTSVC_LANG_SLOVENIAN: /* sl, Slovenia - Slovenian */
250 case CTSVC_LANG_SERBIAN: /* sr, Serbia - Serbian */
252 case CTSVC_LANG_SWEDISH: /* sv, Finland - Swedish */
254 case CTSVC_LANG_TURKISH: /* tr_TR, Turkey - Turkish */
256 case CTSVC_LANG_UKRAINE: /* uk, Ukraine */
258 case CTSVC_LANG_CHINESE: /* zh_CN, zh_HK, zh_SG, zh_TW */
260 case CTSVC_LANG_THAI: /* th_TH, Thai */
262 case CTSVC_LANG_BENGALI: /* as, bn */
263 if (STRING_EQUAL == strncmp(langset, "as", strlen("as"))) {
267 case CTSVC_LANG_PUNJABI: /* pa, India */
269 case CTSVC_LANG_MALAYALAM:
271 case CTSVC_LANG_TELUGU:
273 case CTSVC_LANG_TAMIL:
275 case CTSVC_LANG_ORIYA:
277 case CTSVC_LANG_SINHALA:
279 case CTSVC_LANG_GUJARATI:
281 case CTSVC_LANG_KANNADA:
285 case CTSVC_LANG_HEBREW:
287 case CTSVC_LANG_VIETNAMESE:
289 case CTSVC_LANG_PERSIAN:
291 case CTSVC_LANG_UZBEK:
293 case CTSVC_LANG_URDU:
295 case CTSVC_LANG_ALBANIAN:
297 case CTSVC_LANG_BURMESE:
299 case CTSVC_LANG_MALAY:
301 case CTSVC_LANG_KHMER:
303 case CTSVC_LANG_INDONESIAN:
305 case CTSVC_LANG_TAGALOG:
312 int ctsvc_get_language_type(const char *system_lang)
314 /* refer to the VCONFKEY_LANGSET */
317 RETV_IF(NULL == system_lang, CTSVC_LANG_OTHERS);
320 if (STRING_EQUAL == strncmp(system_lang, "az", strlen("az")))
321 type = CTSVC_LANG_AZERBAIJAN;
322 /* ar, Bahrain - Arabic */
323 else if (STRING_EQUAL == strncmp(system_lang, "ar", strlen("ar")))
324 type = CTSVC_LANG_ARABIC;
325 /* bg, Bulgaria - Bulgarian */
326 else if (STRING_EQUAL == strncmp(system_lang, "bg", strlen("bg")))
327 type = CTSVC_LANG_BULGARIAN;
328 /* ca, Spain - Catalan */
329 else if (STRING_EQUAL == strncmp(system_lang, "ca", strlen("ca")))
330 type = CTSVC_LANG_CATALAN;
331 /* cs, Czech Republic - Czech */
332 else if (STRING_EQUAL == strncmp(system_lang, "cs", strlen("cs")))
333 type = CTSVC_LANG_CZECH;
334 /* da, Denmark - Danish */
335 else if (STRING_EQUAL == strncmp(system_lang, "da", strlen("da")))
336 type = CTSVC_LANG_DANISH;
337 /* de, Germany - German */
338 else if (STRING_EQUAL == strncmp(system_lang, "de", strlen("de")))
339 type = CTSVC_LANG_GERMAN;
340 /* el, Greece - Greek */
341 else if (STRING_EQUAL == strncmp(system_lang, "el", strlen("el")))
342 type = CTSVC_LANG_GREEK;
343 /* en, en_PH, en_US */
344 else if (STRING_EQUAL == strncmp(system_lang, "en", strlen("en")))
345 type = CTSVC_LANG_ENGLISH;
346 /* es_ES, es_US, El Salvador - Spanish */
347 else if (STRING_EQUAL == strncmp(system_lang, "es", strlen("es")))
348 type = CTSVC_LANG_SPANISH;
349 /* et, Estonia - Estonian */
350 else if (STRING_EQUAL == strncmp(system_lang, "et", strlen("et")))
351 type = CTSVC_LANG_ESTONIAN;
352 /* eu, Spain - Basque */
353 else if (STRING_EQUAL == strncmp(system_lang, "eu", strlen("eu")))
354 type = CTSVC_LANG_BASQUE;
355 /* fi, Finland - Finnish */
356 else if (STRING_EQUAL == strncmp(system_lang, "fi", strlen("fi")))
357 type = CTSVC_LANG_FINNISH;
359 else if (STRING_EQUAL == strncmp(system_lang, "fr", strlen("fr")))
360 type = CTSVC_LANG_FRENCH;
361 /* ga, Ireland - Irish */
362 else if (STRING_EQUAL == strncmp(system_lang, "ga", strlen("ga")))
363 type = CTSVC_LANG_IRISH;
364 /* gl, Spain - Galician */
365 else if (STRING_EQUAL == strncmp(system_lang, "gl", strlen("gl")))
366 type = CTSVC_LANG_GALICIAN;
367 /* hi, India - Hindi */
368 else if (STRING_EQUAL == strncmp(system_lang, "hi", strlen("hi")))
369 type = CTSVC_LANG_HINDI;
370 /* mr, India - marathi */
371 else if (STRING_EQUAL == strncmp(system_lang, "mr", strlen("mr")))
372 type = CTSVC_LANG_HINDI;
373 /* ne, India - nepal */
374 else if (STRING_EQUAL == strncmp(system_lang, "ne", strlen("ne")))
375 type = CTSVC_LANG_HINDI;
376 /* hr, Bosnia and Herzegovina - Croatian */
377 else if (STRING_EQUAL == strncmp(system_lang, "hr", strlen("hr")))
378 type = CTSVC_LANG_CROATIAN;
379 /* hu, Hungary - Hungarian */
380 else if (STRING_EQUAL == strncmp(system_lang, "hu", strlen("hu")))
381 type = CTSVC_LANG_HUNGARIAN;
382 /* hy, Armenia - Armenian */
383 else if (STRING_EQUAL == strncmp(system_lang, "hy", strlen("hy")))
384 type = CTSVC_LANG_ARMENIAN;
385 /* is, Iceland - Icelandic */
386 else if (STRING_EQUAL == strncmp(system_lang, "is", strlen("is")))
387 type = CTSVC_LANG_ICELANDIC;
388 /* it_IT, Italy - Italian */
389 else if (STRING_EQUAL == strncmp(system_lang, "it", strlen("it")))
390 type = CTSVC_LANG_ITALIAN;
392 else if (STRING_EQUAL == strncmp(system_lang, "ja", strlen("ja")))
393 type = CTSVC_LANG_JAPANESE;
394 /* ka, Georgia - Georgian */
395 else if (STRING_EQUAL == strncmp(system_lang, "ka", strlen("ka")))
396 type = CTSVC_LANG_GEORGIAN;
398 else if (STRING_EQUAL == strncmp(system_lang, "kk", strlen("kk")))
399 type = CTSVC_LANG_KAZAKHSTAN;
401 else if (STRING_EQUAL == strncmp(system_lang, "ko", strlen("ko")))
402 type = CTSVC_LANG_KOREAN;
403 /* lt, Lithuania - Lithuanian */
404 else if (STRING_EQUAL == strncmp(system_lang, "lt", strlen("lt")))
405 type = CTSVC_LANG_LITHUANIAN;
406 /* lv, Latvia - Latvian */
407 else if (STRING_EQUAL == strncmp(system_lang, "lv", strlen("lv")))
408 type = CTSVC_LANG_LATVIAN;
410 else if (STRING_EQUAL == strncmp(system_lang, "mk", strlen("mk")))
411 type = CTSVC_LANG_MACEDONIA;
413 else if (STRING_EQUAL == strncmp(system_lang, "nb", strlen("nb")))
414 type = CTSVC_LANG_NORWAY;
415 /* nl_Nl, Netherlands Dutch */
416 else if (STRING_EQUAL == strncmp(system_lang, "nl", strlen("nl")))
417 type = CTSVC_LANG_DUTCH;
419 else if (STRING_EQUAL == strncmp(system_lang, "pl", strlen("pl")))
420 type = CTSVC_LANG_POLISH;
421 /* pt_BR, pt_PT, Portugal */
422 else if (STRING_EQUAL == strncmp(system_lang, "pt", strlen("pt")))
423 type = CTSVC_LANG_PORTUGUESE;
425 else if (STRING_EQUAL == strncmp(system_lang, "ro", strlen("ro")))
426 type = CTSVC_LANG_ROMANIA;
428 else if (STRING_EQUAL == strncmp(system_lang, "ru", strlen("ru")))
429 type = CTSVC_LANG_RUSSIAN;
430 /* sk, Slovakia - Slovak */
431 else if (STRING_EQUAL == strncmp(system_lang, "sk", strlen("sk")))
432 type = CTSVC_LANG_SLOVAK;
433 /* sl, Slovenia - Slovenian */
434 else if (STRING_EQUAL == strncmp(system_lang, "sl", strlen("sl")))
435 type = CTSVC_LANG_SLOVENIAN;
436 /* sr, Serbia - Serbian */
437 else if (STRING_EQUAL == strncmp(system_lang, "sr", strlen("sr")))
438 type = CTSVC_LANG_SERBIAN;
439 /* sv, Finland - Swedish */
440 else if (STRING_EQUAL == strncmp(system_lang, "sv", strlen("sv")))
441 type = CTSVC_LANG_SWEDISH;
442 /* tr_TR, Turkey - Turkish */
443 else if (STRING_EQUAL == strncmp(system_lang, "tr", strlen("tr")))
444 type = CTSVC_LANG_TURKISH;
446 else if (STRING_EQUAL == strncmp(system_lang, "uk", strlen("uk")))
447 type = CTSVC_LANG_UKRAINE;
448 /* zh_CN, zh_HK, zh_SG, zh_TW */
449 else if (STRING_EQUAL == strncmp(system_lang, "zh", strlen("zh")))
450 type = CTSVC_LANG_CHINESE;
452 else if (STRING_EQUAL == strncmp(system_lang, "th", strlen("th")))
453 type = CTSVC_LANG_THAI;
454 else if (STRING_EQUAL == strncmp(system_lang, "as", strlen("as")))
455 type = CTSVC_LANG_BENGALI;
456 else if (STRING_EQUAL == strncmp(system_lang, "bn", strlen("bn")))
457 type = CTSVC_LANG_BENGALI;
458 else if (STRING_EQUAL == strncmp(system_lang, "pa", strlen("pa")))
459 type = CTSVC_LANG_PUNJABI;
460 else if (STRING_EQUAL == strncmp(system_lang, "ml", strlen("ml")))
461 type = CTSVC_LANG_MALAYALAM;
462 else if (STRING_EQUAL == strncmp(system_lang, "te", strlen("te")))
463 type = CTSVC_LANG_TELUGU;
464 else if (STRING_EQUAL == strncmp(system_lang, "ta", strlen("ta")))
465 type = CTSVC_LANG_TAMIL;
466 else if (STRING_EQUAL == strncmp(system_lang, "or", strlen("or")))
467 type = CTSVC_LANG_ORIYA;
468 else if (STRING_EQUAL == strncmp(system_lang, "si", strlen("si")))
469 type = CTSVC_LANG_SINHALA;
470 else if (STRING_EQUAL == strncmp(system_lang, "gu", strlen("gu")))
471 type = CTSVC_LANG_GUJARATI;
472 else if (STRING_EQUAL == strncmp(system_lang, "kn", strlen("kn")))
473 type = CTSVC_LANG_KANNADA;
474 else if (STRING_EQUAL == strncmp(system_lang, "lo", strlen("lo")))
475 type = CTSVC_LANG_LAO;
476 else if (STRING_EQUAL == strncmp(system_lang, "he", strlen("he")))
477 type = CTSVC_LANG_HEBREW;
478 else if (STRING_EQUAL == strncmp(system_lang, "vi", strlen("vi")))
479 type = CTSVC_LANG_VIETNAMESE;
480 else if (STRING_EQUAL == strncmp(system_lang, "fa", strlen("fa")))
481 type = CTSVC_LANG_PERSIAN;
482 else if (STRING_EQUAL == strncmp(system_lang, "uz", strlen("uz")))
483 type = CTSVC_LANG_UZBEK;
484 else if (STRING_EQUAL == strncmp(system_lang, "ur", strlen("ur")))
485 type = CTSVC_LANG_URDU;
486 else if (STRING_EQUAL == strncmp(system_lang, "sq", strlen("sq")))
487 type = CTSVC_LANG_ALBANIAN;
488 else if (STRING_EQUAL == strncmp(system_lang, "my", strlen("my")))
489 type = CTSVC_LANG_BURMESE;
490 else if (STRING_EQUAL == strncmp(system_lang, "ms", strlen("ms")))
491 type = CTSVC_LANG_MALAY;
492 else if (STRING_EQUAL == strncmp(system_lang, "km", strlen("km")))
493 type = CTSVC_LANG_KHMER;
494 else if (STRING_EQUAL == strncmp(system_lang, "id", strlen("id")))
495 type = CTSVC_LANG_INDONESIAN;
496 else if (STRING_EQUAL == strncmp(system_lang, "tl", strlen("tl")))
497 type = CTSVC_LANG_TAGALOG;
499 type = CTSVC_LANG_OTHERS;
504 static char *langset = NULL;
506 char* ctsvc_get_langset()
508 return SAFE_STR(langset);
511 void ctsvc_set_langset(char *new_langset)
514 langset = new_langset;