move localize,normalize files from server to common
[platform/core/pim/contacts-service.git] / common / ctsvc_localize_kor.c
1 /*
2  * Contacts Service
3  *
4  * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19
20 #include <unicode/ustring.h>
21 #include <unicode/unorm.h>
22 #include <unicode/ucol.h>
23 #include <unicode/uset.h>
24
25 #include "ctsvc_internal.h"
26 #include "ctsvc_normalize.h"
27 #include "ctsvc_localize.h"
28 #include "ctsvc_localize_utils.h"
29
30 #include "ctsvc_localize_kor.h"
31
32
33 /* korean -Hangul Jamo */
34 #define CTSVC_HAN_J_START (UChar)0x1100
35 #define CTSVC_HAN_J_END (UChar)0x11FF
36
37 /* korean -Hangul Jamo extended A*/
38 #define CTSVC_JAMO_A_START (UChar)0xA960
39 #define CTSVC_JAMO_A_END (UChar)0xA97F
40
41 /* korean -Hangul Jamo extended B*/
42 #define CTSVC_JAMO_B_START (UChar)0xD7B0
43 #define CTSVC_JAMO_B_END (UChar)0xD7FF
44
45 /* korean -Hangul Compatability */
46 #define CTSVC_HAN_C_START (UChar)0x3130
47 #define CTSVC_HAN_C_END (UChar)0x318F
48
49 /* korean -Hangul halfwidth */
50 #define CTSVC_HAN_HALF_START (UChar)0xFFA0
51 #define CTSVC_HAN_HALF_END (UChar)0xFFDC
52
53 /* korean -Hangul Syllables */
54 #define CTSVC_HAN_SYLLABLES_START (UChar)0xAC00
55 #define CTSVC_HAN_SYLLABLES_END (UChar)0xD7A3
56
57
58 static const char hangul_compatibility_choseong[] = {
59         0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41,
60         0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
61         0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x65, 0x66, 0x6E,
62         0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
63         0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80,
64         0x81, 0x84, 0x85, 0x86, 0x31, 0x00};
65
66 static const char hangul_jamo_choseong[] = {
67         0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07,   /* to choseong 0x1100~0x115F */
68         0x08, 0x21, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
69         0x10, 0x11, 0x12, 0x14, 0x15, 0x1C, 0x1D, 0x1E, 0x20,
70         0x22, 0x23, 0x27, 0x29, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
71         0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00, 0x00};
72
73 static const char hangul_compatibility_jungseong[] = {
74         0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
75         0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
76         0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x87, 0x88,
77         0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x00};
78
79 static const char hangul_jamo_jungseong[] = {
80         0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,   /* to jungseong 0x1160~0x11A7 */
81         0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
82         0x73, 0x74, 0x75, 0x60, 0x84, 0x85, 0x88, 0x91, 0x92,
83         0x94, 0x9E, 0xA1, 0x00};
84
85 static const char hangul_compatibility_jongseong[] = {
86         0x33, 0x35, 0x36, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
87         0x3F, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
88         0x6F, 0x70, 0x82, 0x83, 0x00};
89
90 static const char hangul_jamo_jongseong[] = {
91         0xAA, 0xAC, 0xAD, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5,   /* to jongseong 0x11A8~0x11FF */
92         0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDD, 0xDF, 0xF1, 0xF2, 0x00};
93
94 static inline bool is_chosung(UChar src)
95 {
96         int unicode_value1 = 0;
97         int unicode_value2 = 0;
98
99         unicode_value1 = (0xFF00 & (src)) >> 8;
100         unicode_value2 = (0xFF & (src));
101
102         if (unicode_value1 == 0x31
103                         && (0x30 <= unicode_value2 && unicode_value2 <= 0x4e))   /* compatiblility jame */
104                 return true;
105
106         if (unicode_value1 == 0xA9
107                         && (0x60 <= unicode_value2 && unicode_value2 <= 0x7C)) /* jamo Extended-A */
108                 return true;
109
110         if (unicode_value1 == 0x11
111                         && (0x00 <= unicode_value2 && unicode_value2 <= 0x5E))  /* jamo */
112                 return true;
113
114         return false;
115 }
116
117 bool ctsvc_is_hangul(UChar src)
118 {
119         if ((0x1100 == (src & 0xFF00))       /* korean -Hangul Jamo*/
120                         || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_A_START, src, CTSVC_JAMO_A_END)
121                         || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_B_START, src, CTSVC_JAMO_B_END)
122                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, src, CTSVC_HAN_C_END)
123                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, src, CTSVC_HAN_HALF_END)
124                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_SYLLABLES_START, src, CTSVC_HAN_SYLLABLES_END)) {
125                 return true;
126         } else {
127                 return FALSE;
128         }
129 }
130
131
132 void ctsvc_hangul_compatibility2jamo(UChar *src)
133 {
134         int unicode_value1 = 0;
135         int unicode_value2 = 0;
136
137         unicode_value1 = (0xFF00 & (*src)) >> 8;
138         unicode_value2 = (0xFF & (*src));
139
140         /* korean -Hangul Jamo halfwidth*/
141         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
142                 unicode_value1 = 0x31;
143
144                 if (unicode_value2 < 0xBF)
145                         unicode_value2 -= 0x70;
146                 else if (unicode_value2 < 0xC8)
147                         unicode_value2 -= 0x73;
148                 else if (unicode_value2 < 0xD0)
149                         unicode_value2 -= 0x75;
150                 else if (unicode_value2 < 0xD8)
151                         unicode_value2 -= 0x77;
152                 else
153                         unicode_value2 -= 0x79;
154
155                 (*src) = unicode_value1 << 8 | unicode_value2;
156         }
157
158         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, *src, CTSVC_HAN_C_END)) {
159                 char *pos;
160                 if ((pos = strchr(hangul_compatibility_choseong, unicode_value2))) {
161                         unicode_value1 = 0x11;
162                         unicode_value2 = hangul_jamo_choseong[pos - hangul_compatibility_choseong];
163                         (*src) = unicode_value1 << 8 | unicode_value2;
164                 } else if ((pos = strchr(hangul_compatibility_jungseong, unicode_value2))) {
165                         unicode_value1 = 0x11;
166                         unicode_value2 = hangul_jamo_jungseong[pos - hangul_compatibility_jungseong];
167                         (*src) = unicode_value1 << 8 | unicode_value2;
168                 } else if ((pos = strchr(hangul_compatibility_jongseong, unicode_value2))) {
169                         unicode_value1 = 0x11;
170                         unicode_value2 = hangul_jamo_jongseong[pos - hangul_compatibility_jongseong];
171                         (*src) = unicode_value1 << 8 | unicode_value2;
172                 }
173         }
174 }
175
176 void ctsvc_hangul_jamo2compatibility(UChar *src)
177 {
178         int unicode_value1 = 0;
179         int unicode_value2 = 0;
180
181         unicode_value1 = (0xFF00 & (*src)) >> 8;
182         unicode_value2 = (0xFF & (*src));
183
184         /* korean -Hangul Jamo halfwidth*/
185         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
186                 unicode_value1 = 0x31;
187
188                 if (unicode_value2 < 0xBF)
189                         unicode_value2 -= 0x70;
190                 else if (unicode_value2 < 0xC8)
191                         unicode_value2 -= 0x73;
192                 else if (unicode_value2 < 0xD0)
193                         unicode_value2 -= 0x75;
194                 else if (unicode_value2 < 0xD8)
195                         unicode_value2 -= 0x77;
196                 else
197                         unicode_value2 -= 0x79;
198
199                 (*src) = unicode_value1 << 8 | unicode_value2;
200         }
201
202         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_J_START, *src, CTSVC_HAN_J_END)) {
203                 char *pos;
204                 if ((pos = strchr(hangul_jamo_choseong, unicode_value2))) {
205                         unicode_value1 = 0x31;
206                         unicode_value2 = hangul_compatibility_choseong[pos - hangul_jamo_choseong];
207                         (*src) = unicode_value1 << 8 | unicode_value2;
208                 } else if ((pos = strchr(hangul_jamo_jungseong, unicode_value2))) {
209                         unicode_value1 = 0x31;
210                         unicode_value2 = hangul_compatibility_jungseong[pos - hangul_jamo_jungseong];
211                         (*src) = unicode_value1 << 8 | unicode_value2;
212                 } else if ((pos = strchr(hangul_jamo_jongseong, unicode_value2))) {
213                         unicode_value1 = 0x31;
214                         unicode_value2 = hangul_compatibility_jongseong[pos - hangul_jamo_jongseong];
215                         (*src) = unicode_value1 << 8 | unicode_value2;
216                 }
217         }
218 }
219
220 int ctsvc_get_chosung(const char *src, char *dest, int dest_size)
221 {
222         int32_t size;
223         UErrorCode status = 0;
224         UChar tmp_result[10];
225         UChar result[10];
226         int chosung_len = 0, count = 0, i = 0, j = 0;
227         int char_len = 0;
228         int str_len = strlen(src);
229         char temp[dest_size];
230
231         for (i = 0; i < str_len; i += char_len) {
232                 char char_src[10];
233                 char_len = ctsvc_check_utf8(src[i]);
234                 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
235
236                 memcpy(char_src, &src[i], char_len);
237                 char_src[char_len] = '\0';
238
239                 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
240                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
241                                 "u_strFromUTF8() Fail(%s)", u_errorName(status));
242
243                 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
244                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
245                                 "u_strToLower() Fail(%s)", u_errorName(status));
246
247                 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
248                                 (UChar *)result, array_sizeof(result), &status);
249                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
250                                 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
251                 ctsvc_extra_normalize(result, size);
252                 u_strToUTF8(temp, dest_size, &size, result, -1, &status);
253                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
254                                 "u_strToUTF8() Fail(%s)", u_errorName(status));
255                 chosung_len = ctsvc_check_utf8(temp[0]);
256                 RETVM_IF(chosung_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
257                 memcpy(&dest[j], temp, chosung_len);
258                 j += chosung_len;
259                 count++;
260         }
261
262         dest[j] = '\0';
263
264         return count;
265 }
266
267 int ctsvc_get_korean_search_pattern(const char *src, char *dest, int dest_size)
268 {
269         int32_t size;
270         UErrorCode status = 0;
271         UChar tmp_result[10];
272         UChar result[10];
273         int i = 0, j = 0, count = 0;
274         int char_len = 0;
275         int str_len = strlen(src);
276
277         for (i = 0; i < str_len; i += char_len) {
278                 char char_src[10];
279                 char_len = ctsvc_check_utf8(src[i]);
280                 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
281                 if (char_len == 1 && src[i] == ' ')
282                         continue;
283
284                 memcpy(char_src, &src[i], char_len);
285                 char_src[char_len] = '\0';
286
287                 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
288                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
289                                 "u_strFromUTF8() Fail(%s)", u_errorName(status));
290
291                 if (is_chosung(tmp_result[0])) {
292                         ctsvc_hangul_compatibility2jamo(tmp_result);
293
294                         u_strToUTF8(&dest[j], dest_size - j, &size, tmp_result, -1, &status);
295                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
296                                         "u_strToUTF8() Fail(%s)", u_errorName(status));
297                         j += size;
298                         dest[j] = '*';
299                         j++;
300                 } else {
301                         u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
302                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
303                                         "u_strToUpper() Fail(%s)", u_errorName(status));
304                         size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
305                                         (UChar *)result, array_sizeof(result), &status);
306                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
307                                         "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
308                         ctsvc_extra_normalize(result, size);
309                         u_strToUTF8(&dest[j], dest_size - j, &size, result, -1, &status);
310                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
311                                         "u_strToUTF8() Fail(%s)", u_errorName(status));
312                         j += size;
313                 }
314                 count++;
315         }
316
317         dest[j] = '\0';
318         return count;
319 }
320
321 bool ctsvc_is_chosung(const char *src)
322 {
323         int char_len = 0;
324
325         char_len = ctsvc_check_utf8(src[0]);
326         if (char_len < 0) return false;   /* invalid value */
327
328         if (char_len == 3) {
329                 unsigned short tmp;
330
331                 tmp = (src[1] << 8) | src[2];
332                 if (((char)0xE1 == src[0] && CTSVC_COMPARE_BETWEEN(0x8480, tmp, 0x859F)) /* korean -Hangul Jamo*/
333                                 || ((char)0xE3 == src[0] && CTSVC_COMPARE_BETWEEN(0x84B1, tmp, 0x858E)) /* korean -Hangul Compatibility Jamo */
334                                 || ((char)0xEA == src[0] && CTSVC_COMPARE_BETWEEN(0xA5A0, tmp, 0xA5BC))) /* korean -Hangul Jamo extended A*/
335                         return true;
336         }
337         return false;
338 }
339
340 bool ctsvc_has_chosung(const char *src)
341 {
342         int  i = 0;
343         int char_len = 0;
344         int str_len = strlen(src);
345
346         for (i = 0; i < str_len; i += char_len) {
347                 char_len = ctsvc_check_utf8(src[i]);
348                 if (ctsvc_is_chosung(&(src[i])))
349                         return true;
350         }
351         return false;
352 }
353
354 static bool __ctsvc_is_hangul(const char *src)
355 {
356         int char_len = 0;
357
358         char_len = ctsvc_check_utf8(src[0]);
359         if (char_len <= 0) return false;   /* invalid value */
360
361         if (char_len == 3) {
362                 switch (src[0]) {
363                         /*
364                          * Hangul Jamo : 0x1100 ~ 0x11FF
365                          *  e1 84 80 ~ e1 87 bf
366                          */
367                 case 0xE1:
368                         switch (src[1]) {
369                         case 0x84 ... 0x87:
370                                 if (0x80 <= src[2] && src[2] <= 0xBF)
371                                         return true;
372                                 else return false;
373                         default:
374                                 return false;
375                         }
376                         break;
377
378                         /*
379                          * Hangul Compatibility Jamo : 0x3130 ~ 0x318F
380                          *  e3 84 b0 ~ e3 84 bf
381                          *  e3 85 80 ~ e3 85 bf
382                          *  e3 86 80 ~ e3 86 8f
383                          */
384                 case 0xE3:
385                         switch (src[1]) {
386                         case 0x84:
387                                 if (0xB0 <= src[2] && src[2] <= 0xBF)
388                                         return true;
389                                 else return false;
390                         case 0x85:
391                                 if (0x80 <= src[2] && src[2] <= 0xBF)
392                                         return true;
393                                 else return false;
394                         case 0x86:
395                                 if (0x80 <= src[2] && src[2] <= 0x8F)
396                                         return true;
397                                 else return false;
398                         default:
399                                 return false;
400                         }
401                         break;
402
403                         /*
404                          * Hangul Jamo Extended A : 0xA960 ~ 0xA97F
405                          *  ea a5 a0  ~ ea a5 bf
406                          */
407                         /*
408                          * Hangul syllables : 0xAC00 ~ 0xD7AF
409                          *  ea b0 80 ~ ea bf bf
410                          */
411                 case 0xEA:
412                         switch (src[1]) {
413                         case 0xA5:
414                                 if (0xA0 <= src[2] && src[2] <= 0xBF)
415                                         return true;
416                                 else return false;
417                         case 0xB0 ... 0xBF:
418                                 if (0x80 <= src[2] && src[2] <= 0xBF)
419                                         return true;
420                                 else return false;
421                         default:
422                                 return false;
423                         }
424                         break;
425
426                         /*
427                          * Hangul syllables : 0xAC00 ~ 0xD7AF
428                          *  eb 80 80 ~ eb bf bf
429                          *  ec 80 80 ~ ec bf bf
430                          */
431                 case 0xEB ... 0xEC:
432                         switch (src[1]) {
433                         case 0x80 ... 0xBF:
434                                 if (0x80 <= src[2] && src[2] <= 0xBF)
435                                         return true;
436                                 else return false;
437                                 break;
438                         default:
439                                 return false;
440                         }
441                         break;
442
443                         /*
444                          * Hangul syllables : 0xAC00 ~ 0xD7AF
445                          *  ed 80 80 ~ ed 9e af
446                          */
447                         /*
448                          * Hangul Jamo Extended B : 0xD7B0 ~ 0xD7FF
449                          *  ed 9e b0 ~ ed 9f bf
450                          */
451                 case 0xED:
452                         switch (src[1]) {
453                         case 0x80 ... 0x9F:
454                                 if (0x80 <= src[2] && src[2] <= 0xBF)
455                                         return true;
456                                 else return false;
457                         default:
458                                 return false;
459                         }
460                         break;
461
462                         /*
463                          * Hangul halfwidth : 0xFFA0 ~ 0xFFDC
464                          *  ef be a0 ~ ef bf 9c
465                          */
466                 case 0xEF:
467                         switch (src[1]) {
468                         case 0xBE:
469                                 if (0xA0 <= src[2] && src[2] <= 0xBF)
470                                         return true;
471                                 else return false;
472                         case 0xbf:
473                                 if (0x80 <= src[2] && src[2] <= 0x9C)
474                                         return true;
475                                 else return false;
476                         default:
477                                 return false;
478                         }
479                         break;
480                 default:
481                         return false;
482                 }
483         }
484         return false;
485 }
486
487 bool ctsvc_has_korean(const char *src)
488 {
489         int  i = 0;
490         int char_len = 0;
491         int str_len = strlen(src);
492
493         for (i = 0; i < str_len; i += char_len) {
494                 char_len = ctsvc_check_utf8(src[i]);
495                 RETV_IF(CONTACTS_ERROR_INVALID_PARAMETER == char_len, false);
496                 if (__ctsvc_is_hangul(&(src[i])))
497                         return true;
498         }
499         return false;
500 }
501