Lcov statement added for coverage
[platform/core/pim/contacts-service.git] / common / ctsvc_localize_utils.c
1 /*
2  * Contacts Service
3  *
4  * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19 #include <unicode/ustring.h>
20 #include <unicode/unorm.h>
21 #include <unicode/ucol.h>
22 #include <unicode/uset.h>
23
24 #include "ctsvc_internal.h"
25 #include "ctsvc_localize_utils.h"
26
27 int ctsvc_check_utf8(char c)
28 {
29         if ((c & 0xff) < (128 & 0xff))
30                 return 1;
31         else if ((c & (char)0xe0) == (char)0xc0)
32                 return 2;
33         else if ((c & (char)0xf0) == (char)0xe0)
34                 return 3;
35         else if ((c & (char)0xf8) == (char)0xf0)
36                 return 4;
37         else if ((c & (char)0xfc) == (char)0xf8)
38                 return 5;
39         else if ((c & (char)0xfe) == (char)0xfc)
40                 return 6;
41         else
42                 return CONTACTS_ERROR_INVALID_PARAMETER;
43 }
44
45 /**
46  * check language type with first word(UChar) using UBLOCK
47  */
48 int ctsvc_check_language(UChar *word)
49 {
50         int type;
51
52         if (u_isdigit(word[0])) {
53                 type = CTSVC_LANG_NUMBER;
54         } else if (u_isalpha(word[0])) {
55                 /*
56                  * refer to the uchar.h
57                  * #define U_GC_L_MASK  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
58                  * U_GC_LU_MASK : U_UPPERCASE_LETTER
59                  * U_GC_LL_MASK : U_LOWERCASE_LETTER
60                  * U_GC_LT_MASK : U_TITLECASE_LETTER
61                  * U_GC_LM_MASK : U_MODIFIER_LETTER
62                  * U_GC_LO_MASK : U_OTHER_LETTER
63                  */
64
65                 UBlockCode code = ublock_getCode(word[0]);
66                 DBG("Character unicode block is %d", code);
67
68                 switch (code) {
69                         /* english */
70                 case UBLOCK_BASIC_LATIN:          /* =1, [0000] */
71                 case UBLOCK_LATIN_1_SUPPLEMENT:   /* =2, [0080] */
72                 case UBLOCK_LATIN_EXTENDED_A:     /* =3, [0100] */
73                 case UBLOCK_LATIN_EXTENDED_B:     /* =4, [0180] */
74                 case UBLOCK_LATIN_EXTENDED_ADDITIONAL: /* =38, [1E00] */
75                         type = CTSVC_LANG_ENGLISH;
76                         /* type = CTSVC_LANG_CATALAN; */   /* ca, Spain - Catalan */
77                         /* type = CTSVC_LANG_GERMAN; */    /* de, Germany - German */
78                         /* type = CTSVC_LANG_BASQUE; */    /* eu, Spain - Basque */
79                         /* type = CTSVC_LANG_DUTCH; */     /* nl_Nl, Netherlands Dutch */
80                         /* type = CTSVC_LANG_FRENCH; */    /* fr_CA, fr_FR */
81                         /* type = CTSVC_LANG_ITALIAN; */   /* it_IT, Italy - Italian */
82                         /* type = CTSVC_LANG_PORTUGUESE; *//* pt_BR, pt_PT, Portugal */
83                         /* type = CTSVC_LANG_SPANISH; */   /*/ es_ES, es_US, El Salvador - Spanish */
84                         /* type = CTSVC_LANG_NORWAY; */    /* nb, Norway */
85                         /* type = CTSVC_LANG_DANISH; */    /* da, Denmark - Danish */
86                         /* type = CTSVC_LANG_AZERBAIJAN; *//* az, Azerbaijan */
87                         /* type = CTSVC_LANG_ROMANIA; */   /* ro, Romania */
88                         /* type = CTSVC_LANG_CZECH; */     /* cs, Czech Republic - Czech */
89                         /* type = CTSVC_LANG_ESTONIAN; */  /* et, Estonia - Estonian */
90                         /* type = CTSVC_LANG_FINNISH; */   /* fi, Finland - Finnish */
91                         /* type = CTSVC_LANG_IRISH; */     /* ga, Ireland - Irish */
92                         /* type = CTSVC_LANG_GALICIAN; */  /* gl, Spain - Galician */
93                         /* type = CTSVC_LANG_HUNGARIAN; */ /* hu, Hungary - Hungarian */
94                         /* type = CTSVC_LANG_SWEDISH; */   /* sv, Finland - Swedish */
95                         /* type = CTSVC_LANG_SLOVENIAN; */ /* sl, Slovenia - Slovenian */
96                         /* type = CTSVC_LANG_SLOVAK; */    /* sk, Slovakia - Slovak */
97                         /* type = CTSVC_LANG_LITHUANIAN; *//* lt, Lithuania - Lithuanian */
98                         /* type = CTSVC_LANG_POLISH; */    /* pl, Polish */
99                         /* type = CTSVC_LANG_LATVIAN; */   /* lv, Latvia - Latvian */
100                         /* type = CTSVC_LANG_CROATIAN; */  /* hr, Bosnia and Herzegovina - Croatian */
101                         /* type = CTSVC_LANG_ICELANDIC; */ /* is, Iceland - Icelandic */
102                         break;
103
104                         /* korean */
105                 case UBLOCK_HANGUL_JAMO:                /* =30, [1100] */
106                 case UBLOCK_HANGUL_COMPATIBILITY_JAMO:  /* =65, [3130] */
107                 case UBLOCK_HANGUL_SYLLABLES:           /* =74, [AC00] */
108                 case UBLOCK_HANGUL_JAMO_EXTENDED_A:     /* =180, [A960] */
109                 case UBLOCK_HANGUL_JAMO_EXTENDED_B:     /* =185, [D7B0] */
110                         type = CTSVC_LANG_KOREAN;
111                         break;
112
113                         /* chainese */
114                 case UBLOCK_CJK_RADICALS_SUPPLEMENT:                 /* =58, [2E80] */
115                 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:             /* =61, [3000] */
116                 case UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS:         /* =68, [3200] */
117                 case UBLOCK_CJK_STROKES:                             /* =130, [31C0] */
118                 case UBLOCK_CJK_COMPATIBILITY:                       /* =69, [3300] */
119                 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:      /* =70, [3400] */
120                 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:                  /* =71, [4E00] */
121                 case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:            /* =79, [F900] */
122                 case UBLOCK_CJK_COMPATIBILITY_FORMS:                 /* =83, [FE30] */
123                 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B:     /* =94, [20000] */
124                 case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT: /* =95, [2F800] */
125                 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C:      /* =197, [2A700] */
126                 case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D:      /* =209, [2B740] */
127 /* LCOV_EXCL_START */
128                         type = CTSVC_LANG_CHINESE;
129                         break;
130
131                         /* japanese */
132                 case UBLOCK_HIRAGANA:                      /* =62, [3040] */
133                 case UBLOCK_KATAKANA:                      /* =63, [30A0] */
134                 case UBLOCK_KATAKANA_PHONETIC_EXTENSIONS:  /* =107, [31F0] */
135                 case UBLOCK_JAVANESE:                      /* =181, [A980] */
136                         type = CTSVC_LANG_JAPANESE;
137                         break;
138
139                 case UBLOCK_GREEK:              /* =8, [0370] */
140                 case UBLOCK_GREEK_EXTENDED:     /* =39, [1F00] */
141                         type = CTSVC_LANG_GREEK;
142                         break;
143
144                 case UBLOCK_CYRILLIC:                 /* =9, [0400] */
145                 case UBLOCK_CYRILLIC_EXTENDED_A:      /* =158, [2DE0] */
146                 case UBLOCK_CYRILLIC_EXTENDED_B:      /* =160, [A640] */
147                 case UBLOCK_CYRILLIC_SUPPLEMENTARY:   /* =97, UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, [0500] */
148                         type = CTSVC_LANG_RUSSIAN;
149                         /* type = CTSVC_LANG_BULGARIAN: */  /* bg, Bulgaria - Bulgarian */
150                         /* type = CTSVC_LANG_MACEDONIA: */  /* mk, Macedonia */
151                         /* type = CTSVC_LANG_KAZAKHSTAN: */ /* kk, Kazakhstan */
152                         /* type = CTSVC_LANG_SERBIAN: */    /* sr, Serbia - Serbian */
153                         /* type = CTSVC_LANG_UKRAINE: */    /* uk, Ukraine */
154                         break;
155
156                 case UBLOCK_ARMENIAN:             /* =10, [0530] */
157                         type = CTSVC_LANG_ARMENIAN;
158                         break;
159                 case UBLOCK_ARABIC:               /* =12, [0600] */
160                         type = CTSVC_LANG_ARABIC;
161                         break;
162                 case UBLOCK_DEVANAGARI:           /* =15, [0900] */
163                 case UBLOCK_DEVANAGARI_EXTENDED:  /* =179, [A8E0] */
164                         type = CTSVC_LANG_HINDI;
165                         break;
166                 case UBLOCK_GEORGIAN:             /* =29, [10A0] */
167                 case UBLOCK_GEORGIAN_SUPPLEMENT:  /* =135, [2D00] */
168                         type = CTSVC_LANG_GEORGIAN;
169                         break;
170                 case UBLOCK_OLD_TURKIC:           /* =191, [10C00] */
171                         type = CTSVC_LANG_TURKISH;
172                         break;
173                 case UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS: /* =87, [FF00]  hangul : FFA0 ~ FFDC */
174                         if (CTSVC_COMPARE_BETWEEN((UChar)0xFF21, word[0], (UChar)0xFF3A)
175                                         || CTSVC_COMPARE_BETWEEN((UChar)0xFF41, word[0], (UChar)0xFF5A))
176                                 type = CTSVC_LANG_ENGLISH;
177                         else if (CTSVC_COMPARE_BETWEEN((UChar)0xFF10, word[0], (UChar)0xFF19))
178                                 type = CTSVC_LANG_NUMBER;
179                         else if (CTSVC_COMPARE_BETWEEN((UChar)0xFF65, word[0], (UChar)0xFF9F))
180                                 type = CTSVC_LANG_JAPANESE;
181                         else if (CTSVC_COMPARE_BETWEEN((UChar)0xFFA0, word[0], (UChar)0xFFDC))
182                                 type = CTSVC_LANG_KOREAN;
183                         else
184                                 type = CTSVC_LANG_OTHERS;
185                         break;
186                 default:
187                         type = CTSVC_LANG_OTHERS;
188 /* LCOV_EXCL_STOP */
189                 }
190         } else {
191                 type = CTSVC_LANG_OTHERS;
192         }
193
194         DBG("language type = %d", type);
195         return type;
196 }
197
198 /* check language type by first word(char*) using UBLOCK */
199 int ctsvc_check_language_type(const char *src)
200 {
201         int length = 0;
202         char temp[10] = {0};
203         UChar tmp_result[2];
204         UChar result[10];
205         UErrorCode status = 0;
206
207         if (src && src[0]) {
208                 length = ctsvc_check_utf8(src[0]);
209                 RETVM_IF(length <= 0, CONTACTS_ERROR_INTERNAL, "check_utf8() Fail");
210
211                 strncpy(temp, src, length);
212
213                 DBG("temp(%s) src(%s) length(%d)", temp, src, length);
214
215                 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, temp, -1, &status);
216                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
217                                 "u_strFromUTF8() Fail(%s)", u_errorName(status));
218
219                 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
220                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
221                                 "u_strToLower() Fail(%s)", u_errorName(status));
222
223                 unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
224                                 (UChar *)result, array_sizeof(result), &status);
225                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
226                                 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
227
228                 DBG("0x%x%x", (0xFF00 & (tmp_result[0])) >> 8,  (0xFF & (tmp_result[0])));
229
230                 return ctsvc_check_language(result);
231         }
232
233         return CONTACTS_ERROR_INVALID_PARAMETER;
234 }
235