4 * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include <unicode/ustring.h>
20 #include <unicode/unorm.h>
21 #include <unicode/ucol.h>
22 #include <unicode/uset.h>
24 #include "ctsvc_internal.h"
25 #include "ctsvc_normalize.h"
26 #include "ctsvc_localize.h"
27 #include "ctsvc_localize_utils.h"
29 #include "ctsvc_localize_kor.h"
30 #include "ctsvc_localize_jp.h"
32 int ctsvc_get_sort_type_from_language(int language)
35 case CTSVC_LANG_CHINESE:
36 return CTSVC_SORT_CJK;
37 case CTSVC_LANG_JAPANESE:
38 return CTSVC_SORT_JAPANESE;
39 case CTSVC_LANG_KOREAN:
40 return CTSVC_SORT_KOREAN;
41 case CTSVC_LANG_ENGLISH:
42 return CTSVC_SORT_WESTERN;
43 case CTSVC_LANG_NUMBER:
44 return CTSVC_SORT_NUMBER;
45 case CTSVC_LANG_RUSSIAN:
46 case CTSVC_LANG_BULGARIAN:
47 case CTSVC_LANG_MACEDONIA:
48 case CTSVC_LANG_KAZAKHSTAN:
49 case CTSVC_LANG_SERBIAN:
50 case CTSVC_LANG_UKRAINE:
51 return CTSVC_SORT_CYRILLIC;
52 case CTSVC_LANG_ARMENIAN:
53 return CTSVC_SORT_ARMENIAN;
54 case CTSVC_LANG_GREEK:
55 return CTSVC_SORT_GREEK;
56 case CTSVC_LANG_ARABIC:
57 case CTSVC_LANG_PERSIAN:
59 return CTSVC_SORT_ARABIC;
60 case CTSVC_LANG_HINDI:
61 return CTSVC_SORT_DEVANAGARI;
62 case CTSVC_LANG_GEORGIAN:
63 return CTSVC_SORT_GEORGIAN;
64 case CTSVC_LANG_TURKISH:
65 return CTSVC_SORT_TURKISH;
67 return CTSVC_SORT_THAI;
68 case CTSVC_LANG_BENGALI:
69 return CTSVC_SORT_BENGALI;
70 case CTSVC_LANG_PUNJABI:
71 return CTSVC_SORT_PUNJABI;
72 case CTSVC_LANG_MALAYALAM:
73 return CTSVC_SORT_MALAYALAM;
74 case CTSVC_LANG_TELUGU:
75 return CTSVC_SORT_TELUGU;
76 case CTSVC_LANG_TAMIL:
77 return CTSVC_SORT_TAMIL;
78 case CTSVC_LANG_ORIYA:
79 return CTSVC_SORT_ORIYA;
80 case CTSVC_LANG_SINHALA:
81 return CTSVC_SORT_SINHALA;
82 case CTSVC_LANG_GUJARATI:
83 return CTSVC_SORT_GUJARATI;
84 case CTSVC_LANG_KANNADA:
85 return CTSVC_SORT_KANNADA;
87 return CTSVC_SORT_LAO;
88 case CTSVC_LANG_HEBREW:
89 return CTSVC_SORT_HEBREW;
90 case CTSVC_LANG_BURMESE:
91 return CTSVC_SORT_BURMESE;
92 case CTSVC_LANG_KHMER:
93 return CTSVC_SORT_KHMER;
94 case CTSVC_LANG_OTHERS:
95 return CTSVC_SORT_OTHERS;
97 return CTSVC_SORT_WESTERN;
101 int ctsvc_get_name_sort_type(const char *src)
103 UErrorCode status = 0;
104 UChar tmp_result[10];
105 int ret = CTSVC_SORT_OTHERS;
110 char_len = ctsvc_check_utf8(src[0]);
111 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
113 memcpy(char_src, &src[0], char_len);
114 char_src[char_len] = '\0';
116 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
117 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
118 "u_strFromUTF8() Fail(%s)", u_errorName(status));
120 language_type = ctsvc_check_language(tmp_result);
121 ret = ctsvc_get_sort_type_from_language(language_type);
126 void ctsvc_extra_normalize(UChar *word, int32_t word_size)
129 for (i = 0; i < word_size; i++) {
130 /* FF00 ~ FF60, FFE0~FFE6 : fullwidth -> halfwidth */
131 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) {
132 int unicode_value1 = 0;
133 int unicode_value2 = 0;
134 unicode_value1 = 0x0;
135 unicode_value2 = (0xFF & word[i]) + 0x20;
136 word[i] = unicode_value1 << 8 | unicode_value2;
137 } else if (ctsvc_is_hangul(word[i])) {
138 ctsvc_hangul_compatibility2jamo(&word[i]);
143 void ctsvc_extra_index_normalize(UChar *word, int32_t word_size)
146 for (i = 0; i < word_size; i++) {
147 /* FF00 ~ FF60, FFE0~FFE6 : fullwidth -> halfwidth */
148 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) {
149 int unicode_value1 = 0;
150 int unicode_value2 = 0;
151 unicode_value1 = 0x0;
152 unicode_value2 = (0xFF & word[i]) + 0x20;
153 word[i] = unicode_value1 << 8 | unicode_value2;
154 } else if (ctsvc_is_hangul(word[i])) {
155 ctsvc_hangul_jamo2compatibility(&word[i]);
160 const char *ctsvc_get_language_locale(int lang)
162 char *langset = ctsvc_get_langset();
165 case CTSVC_LANG_AZERBAIJAN: /* az, Azerbaijan */
167 case CTSVC_LANG_ARABIC: /* ar, Bahrain - Arabic */
169 case CTSVC_LANG_BULGARIAN: /* bg, Bulgaria - Bulgarian */
171 case CTSVC_LANG_CATALAN: /* ca, Spain - Catalan */
173 case CTSVC_LANG_CZECH: /* cs, Czech Republic - Czech */
175 case CTSVC_LANG_DANISH: /* da, Denmark - Danish */
177 case CTSVC_LANG_GERMAN: /* de, Germany - German */
179 case CTSVC_LANG_GREEK: /* el, Greece - Greek */
181 case CTSVC_LANG_ENGLISH: /* en, en_PH, en_US */
183 case CTSVC_LANG_SPANISH: /* es_ES, es_US, El Salvador - Spanish */
185 case CTSVC_LANG_ESTONIAN: /* et, Estonia - Estonian */
187 case CTSVC_LANG_BASQUE: /* eu, Spain - Basque */
189 case CTSVC_LANG_FINNISH: /* fi, Finland - Finnish */
191 case CTSVC_LANG_FRENCH: /* fr_CA, fr_FR */
193 case CTSVC_LANG_IRISH: /* ga, Ireland - Irish */
195 case CTSVC_LANG_GALICIAN: /* gl, Spain - Galician */
197 case CTSVC_LANG_HINDI: /* hi, India - Hindi, Marathi, Nepali */
198 if (STRING_EQUAL == strncmp(langset, "hi", strlen("hi")))
200 else if (STRING_EQUAL == strncmp(langset, "mr", strlen("mr")))
202 else if (STRING_EQUAL == strncmp(langset, "ne", strlen("ne")))
206 case CTSVC_LANG_CROATIAN: /* hr, Bosnia and Herzegovina - Croatian */
208 case CTSVC_LANG_HUNGARIAN: /* hu, Hungary - Hungarian */
210 case CTSVC_LANG_ARMENIAN: /* hy, Armenia - Armenian */
212 case CTSVC_LANG_ICELANDIC: /* is, Iceland - Icelandic */
214 case CTSVC_LANG_ITALIAN: /* it_IT, Italy - Italian */
216 case CTSVC_LANG_JAPANESE: /* ja_JP, japan */
218 case CTSVC_LANG_GEORGIAN: /* ka, Georgia - Georgian */
220 case CTSVC_LANG_KAZAKHSTAN: /* kk, Kazakhstan */
222 case CTSVC_LANG_KOREAN: /* ko, ko_KR */
224 case CTSVC_LANG_LITHUANIAN: /* lt, Lithuania - Lithuanian */
226 case CTSVC_LANG_LATVIAN: /* lv, Latvia - Latvian */
228 case CTSVC_LANG_MACEDONIA: /* mk, Macedonia */
230 case CTSVC_LANG_NORWAY: /* nb, Norway */
232 case CTSVC_LANG_DUTCH: /* nl_Nl, Netherlands Dutch */
234 case CTSVC_LANG_POLISH: /* pl, Polish */
236 case CTSVC_LANG_PORTUGUESE: /* pt_BR, pt_PT, Portugal */
238 case CTSVC_LANG_ROMANIA: /* ro, Romania */
240 case CTSVC_LANG_RUSSIAN: /* ru_RU, Russia */
242 case CTSVC_LANG_SLOVAK: /* sk, Slovakia - Slovak */
244 case CTSVC_LANG_SLOVENIAN: /* sl, Slovenia - Slovenian */
246 case CTSVC_LANG_SERBIAN: /* sr, Serbia - Serbian */
248 case CTSVC_LANG_SWEDISH: /* sv, Finland - Swedish */
250 case CTSVC_LANG_TURKISH: /* tr_TR, Turkey - Turkish */
252 case CTSVC_LANG_UKRAINE: /* uk, Ukraine */
254 case CTSVC_LANG_CHINESE: /* zh_CN, zh_HK, zh_SG, zh_TW */
256 case CTSVC_LANG_THAI: /* th_TH, Thai */
258 case CTSVC_LANG_BENGALI: /* as, bn */
259 if (STRING_EQUAL == strncmp(langset, "as", strlen("as")))
262 case CTSVC_LANG_PUNJABI: /* pa, India */
264 case CTSVC_LANG_MALAYALAM:
266 case CTSVC_LANG_TELUGU:
268 case CTSVC_LANG_TAMIL:
270 case CTSVC_LANG_ORIYA:
272 case CTSVC_LANG_SINHALA:
274 case CTSVC_LANG_GUJARATI:
276 case CTSVC_LANG_KANNADA:
280 case CTSVC_LANG_HEBREW:
282 case CTSVC_LANG_VIETNAMESE:
284 case CTSVC_LANG_PERSIAN:
286 case CTSVC_LANG_UZBEK:
288 case CTSVC_LANG_URDU:
290 case CTSVC_LANG_ALBANIAN:
292 case CTSVC_LANG_BURMESE:
294 case CTSVC_LANG_MALAY:
296 case CTSVC_LANG_KHMER:
298 case CTSVC_LANG_INDONESIAN:
300 case CTSVC_LANG_TAGALOG:
307 int ctsvc_get_language_type(const char *system_lang)
309 /* refer to the VCONFKEY_LANGSET */
312 RETV_IF(NULL == system_lang, CTSVC_LANG_OTHERS);
315 if (STRING_EQUAL == strncmp(system_lang, "az", strlen("az")))
316 type = CTSVC_LANG_AZERBAIJAN;
317 /* ar, Bahrain - Arabic */
318 else if (STRING_EQUAL == strncmp(system_lang, "ar", strlen("ar")))
319 type = CTSVC_LANG_ARABIC;
320 /* bg, Bulgaria - Bulgarian */
321 else if (STRING_EQUAL == strncmp(system_lang, "bg", strlen("bg")))
322 type = CTSVC_LANG_BULGARIAN;
323 /* ca, Spain - Catalan */
324 else if (STRING_EQUAL == strncmp(system_lang, "ca", strlen("ca")))
325 type = CTSVC_LANG_CATALAN;
326 /* cs, Czech Republic - Czech */
327 else if (STRING_EQUAL == strncmp(system_lang, "cs", strlen("cs")))
328 type = CTSVC_LANG_CZECH;
329 /* da, Denmark - Danish */
330 else if (STRING_EQUAL == strncmp(system_lang, "da", strlen("da")))
331 type = CTSVC_LANG_DANISH;
332 /* de, Germany - German */
333 else if (STRING_EQUAL == strncmp(system_lang, "de", strlen("de")))
334 type = CTSVC_LANG_GERMAN;
335 /* el, Greece - Greek */
336 else if (STRING_EQUAL == strncmp(system_lang, "el", strlen("el")))
337 type = CTSVC_LANG_GREEK;
338 /* en, en_PH, en_US */
339 else if (STRING_EQUAL == strncmp(system_lang, "en", strlen("en")))
340 type = CTSVC_LANG_ENGLISH;
341 /* es_ES, es_US, El Salvador - Spanish */
342 else if (STRING_EQUAL == strncmp(system_lang, "es", strlen("es")))
343 type = CTSVC_LANG_SPANISH;
344 /* et, Estonia - Estonian */
345 else if (STRING_EQUAL == strncmp(system_lang, "et", strlen("et")))
346 type = CTSVC_LANG_ESTONIAN;
347 /* eu, Spain - Basque */
348 else if (STRING_EQUAL == strncmp(system_lang, "eu", strlen("eu")))
349 type = CTSVC_LANG_BASQUE;
350 /* fi, Finland - Finnish */
351 else if (STRING_EQUAL == strncmp(system_lang, "fi", strlen("fi")))
352 type = CTSVC_LANG_FINNISH;
354 else if (STRING_EQUAL == strncmp(system_lang, "fr", strlen("fr")))
355 type = CTSVC_LANG_FRENCH;
356 /* ga, Ireland - Irish */
357 else if (STRING_EQUAL == strncmp(system_lang, "ga", strlen("ga")))
358 type = CTSVC_LANG_IRISH;
359 /* gl, Spain - Galician */
360 else if (STRING_EQUAL == strncmp(system_lang, "gl", strlen("gl")))
361 type = CTSVC_LANG_GALICIAN;
362 /* hi, India - Hindi */
363 else if (STRING_EQUAL == strncmp(system_lang, "hi", strlen("hi")))
364 type = CTSVC_LANG_HINDI;
365 /* mr, India - marathi */
366 else if (STRING_EQUAL == strncmp(system_lang, "mr", strlen("mr")))
367 type = CTSVC_LANG_HINDI;
368 /* ne, India - nepal */
369 else if (STRING_EQUAL == strncmp(system_lang, "ne", strlen("ne")))
370 type = CTSVC_LANG_HINDI;
371 /* hr, Bosnia and Herzegovina - Croatian */
372 else if (STRING_EQUAL == strncmp(system_lang, "hr", strlen("hr")))
373 type = CTSVC_LANG_CROATIAN;
374 /* hu, Hungary - Hungarian */
375 else if (STRING_EQUAL == strncmp(system_lang, "hu", strlen("hu")))
376 type = CTSVC_LANG_HUNGARIAN;
377 /* hy, Armenia - Armenian */
378 else if (STRING_EQUAL == strncmp(system_lang, "hy", strlen("hy")))
379 type = CTSVC_LANG_ARMENIAN;
380 /* is, Iceland - Icelandic */
381 else if (STRING_EQUAL == strncmp(system_lang, "is", strlen("is")))
382 type = CTSVC_LANG_ICELANDIC;
383 /* it_IT, Italy - Italian */
384 else if (STRING_EQUAL == strncmp(system_lang, "it", strlen("it")))
385 type = CTSVC_LANG_ITALIAN;
387 else if (STRING_EQUAL == strncmp(system_lang, "ja", strlen("ja")))
388 type = CTSVC_LANG_JAPANESE;
389 /* ka, Georgia - Georgian */
390 else if (STRING_EQUAL == strncmp(system_lang, "ka", strlen("ka")))
391 type = CTSVC_LANG_GEORGIAN;
393 else if (STRING_EQUAL == strncmp(system_lang, "kk", strlen("kk")))
394 type = CTSVC_LANG_KAZAKHSTAN;
396 else if (STRING_EQUAL == strncmp(system_lang, "ko", strlen("ko")))
397 type = CTSVC_LANG_KOREAN;
398 /* lt, Lithuania - Lithuanian */
399 else if (STRING_EQUAL == strncmp(system_lang, "lt", strlen("lt")))
400 type = CTSVC_LANG_LITHUANIAN;
401 /* lv, Latvia - Latvian */
402 else if (STRING_EQUAL == strncmp(system_lang, "lv", strlen("lv")))
403 type = CTSVC_LANG_LATVIAN;
405 else if (STRING_EQUAL == strncmp(system_lang, "mk", strlen("mk")))
406 type = CTSVC_LANG_MACEDONIA;
408 else if (STRING_EQUAL == strncmp(system_lang, "nb", strlen("nb")))
409 type = CTSVC_LANG_NORWAY;
410 /* nl_Nl, Netherlands Dutch */
411 else if (STRING_EQUAL == strncmp(system_lang, "nl", strlen("nl")))
412 type = CTSVC_LANG_DUTCH;
414 else if (STRING_EQUAL == strncmp(system_lang, "pl", strlen("pl")))
415 type = CTSVC_LANG_POLISH;
416 /* pt_BR, pt_PT, Portugal */
417 else if (STRING_EQUAL == strncmp(system_lang, "pt", strlen("pt")))
418 type = CTSVC_LANG_PORTUGUESE;
420 else if (STRING_EQUAL == strncmp(system_lang, "ro", strlen("ro")))
421 type = CTSVC_LANG_ROMANIA;
423 else if (STRING_EQUAL == strncmp(system_lang, "ru", strlen("ru")))
424 type = CTSVC_LANG_RUSSIAN;
425 /* sk, Slovakia - Slovak */
426 else if (STRING_EQUAL == strncmp(system_lang, "sk", strlen("sk")))
427 type = CTSVC_LANG_SLOVAK;
428 /* sl, Slovenia - Slovenian */
429 else if (STRING_EQUAL == strncmp(system_lang, "sl", strlen("sl")))
430 type = CTSVC_LANG_SLOVENIAN;
431 /* sr, Serbia - Serbian */
432 else if (STRING_EQUAL == strncmp(system_lang, "sr", strlen("sr")))
433 type = CTSVC_LANG_SERBIAN;
434 /* sv, Finland - Swedish */
435 else if (STRING_EQUAL == strncmp(system_lang, "sv", strlen("sv")))
436 type = CTSVC_LANG_SWEDISH;
437 /* tr_TR, Turkey - Turkish */
438 else if (STRING_EQUAL == strncmp(system_lang, "tr", strlen("tr")))
439 type = CTSVC_LANG_TURKISH;
441 else if (STRING_EQUAL == strncmp(system_lang, "uk", strlen("uk")))
442 type = CTSVC_LANG_UKRAINE;
443 /* zh_CN, zh_HK, zh_SG, zh_TW */
444 else if (STRING_EQUAL == strncmp(system_lang, "zh", strlen("zh")))
445 type = CTSVC_LANG_CHINESE;
447 else if (STRING_EQUAL == strncmp(system_lang, "th", strlen("th")))
448 type = CTSVC_LANG_THAI;
449 else if (STRING_EQUAL == strncmp(system_lang, "as", strlen("as")))
450 type = CTSVC_LANG_BENGALI;
451 else if (STRING_EQUAL == strncmp(system_lang, "bn", strlen("bn")))
452 type = CTSVC_LANG_BENGALI;
453 else if (STRING_EQUAL == strncmp(system_lang, "pa", strlen("pa")))
454 type = CTSVC_LANG_PUNJABI;
455 else if (STRING_EQUAL == strncmp(system_lang, "ml", strlen("ml")))
456 type = CTSVC_LANG_MALAYALAM;
457 else if (STRING_EQUAL == strncmp(system_lang, "te", strlen("te")))
458 type = CTSVC_LANG_TELUGU;
459 else if (STRING_EQUAL == strncmp(system_lang, "ta", strlen("ta")))
460 type = CTSVC_LANG_TAMIL;
461 else if (STRING_EQUAL == strncmp(system_lang, "or", strlen("or")))
462 type = CTSVC_LANG_ORIYA;
463 else if (STRING_EQUAL == strncmp(system_lang, "si", strlen("si")))
464 type = CTSVC_LANG_SINHALA;
465 else if (STRING_EQUAL == strncmp(system_lang, "gu", strlen("gu")))
466 type = CTSVC_LANG_GUJARATI;
467 else if (STRING_EQUAL == strncmp(system_lang, "kn", strlen("kn")))
468 type = CTSVC_LANG_KANNADA;
469 else if (STRING_EQUAL == strncmp(system_lang, "lo", strlen("lo")))
470 type = CTSVC_LANG_LAO;
471 else if (STRING_EQUAL == strncmp(system_lang, "he", strlen("he")))
472 type = CTSVC_LANG_HEBREW;
473 else if (STRING_EQUAL == strncmp(system_lang, "vi", strlen("vi")))
474 type = CTSVC_LANG_VIETNAMESE;
475 else if (STRING_EQUAL == strncmp(system_lang, "fa", strlen("fa")))
476 type = CTSVC_LANG_PERSIAN;
477 else if (STRING_EQUAL == strncmp(system_lang, "uz", strlen("uz")))
478 type = CTSVC_LANG_UZBEK;
479 else if (STRING_EQUAL == strncmp(system_lang, "ur", strlen("ur")))
480 type = CTSVC_LANG_URDU;
481 else if (STRING_EQUAL == strncmp(system_lang, "sq", strlen("sq")))
482 type = CTSVC_LANG_ALBANIAN;
483 else if (STRING_EQUAL == strncmp(system_lang, "my", strlen("my")))
484 type = CTSVC_LANG_BURMESE;
485 else if (STRING_EQUAL == strncmp(system_lang, "ms", strlen("ms")))
486 type = CTSVC_LANG_MALAY;
487 else if (STRING_EQUAL == strncmp(system_lang, "km", strlen("km")))
488 type = CTSVC_LANG_KHMER;
489 else if (STRING_EQUAL == strncmp(system_lang, "id", strlen("id")))
490 type = CTSVC_LANG_INDONESIAN;
491 else if (STRING_EQUAL == strncmp(system_lang, "tl", strlen("tl")))
492 type = CTSVC_LANG_TAGALOG;
494 type = CTSVC_LANG_OTHERS;
499 static char *langset = NULL;
501 char* ctsvc_get_langset()
503 return SAFE_STR(langset);
506 void ctsvc_set_langset(char *new_langset)
509 langset = new_langset;