use Hangul Compatibility Jamo for korean index
[platform/core/pim/contacts-service.git] / server / ctsvc_localize_kor.c
1 /*
2  * Contacts Service
3  *
4  * Copyright (c) 2010 - 2012 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19
20 #include <unicode/ustring.h>
21 #include <unicode/unorm.h>
22 #include <unicode/ucol.h>
23 #include <unicode/uset.h>
24
25 #include "ctsvc_internal.h"
26 #include "ctsvc_normalize.h"
27 #include "ctsvc_localize.h"
28 #include "ctsvc_localize_utils.h"
29
30 #include "ctsvc_localize_kor.h"
31
32
33 /* korean -Hangul Jamo */
34 #define CTSVC_HAN_J_START (UChar)0x1100
35 #define CTSVC_HAN_J_END (UChar)0x11FF
36
37 /* korean -Hangul Jamo extended A*/
38 #define CTSVC_JAMO_A_START (UChar)0xA960
39 #define CTSVC_JAMO_A_END (UChar)0xA97F
40
41 /* korean -Hangul Jamo extended B*/
42 #define CTSVC_JAMO_B_START (UChar)0xD7B0
43 #define CTSVC_JAMO_B_END (UChar)0xD7FF
44
45 /* korean -Hangul Compatability */
46 #define CTSVC_HAN_C_START (UChar)0x3130
47 #define CTSVC_HAN_C_END (UChar)0x318F
48
49 /* korean -Hangul halfwidth */
50 #define CTSVC_HAN_HALF_START (UChar)0xFFA0
51 #define CTSVC_HAN_HALF_END (UChar)0xFFDC
52
53 /* korean -Hangul Syllables */
54 #define CTSVC_HAN_SYLLABLES_START (UChar)0xAC00
55 #define CTSVC_HAN_SYLLABLES_END (UChar)0xD7A3
56
57
58 static const char hangul_compatibility_choseong[] = {
59         0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41,
60         0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
61         0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x65, 0x66, 0x6E,
62         0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
63         0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80,
64         0x81, 0x84, 0x85, 0x86, 0x31, 0x00};
65
66 static const char hangul_jamo_choseong[] = {
67         0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07,   /* to choseong 0x1100~0x115F */
68         0x08, 0x21, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
69         0x10, 0x11, 0x12, 0x14, 0x15, 0x1C, 0x1D, 0x1E, 0x20,
70         0x22, 0x23, 0x27, 0x29, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
71         0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00, 0x00};
72
73 static const char hangul_compatibility_jungseong[] = {
74         0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
75         0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
76         0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x87, 0x88,
77         0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x00};
78
79 static const char hangul_jamo_jungseong[] = {
80         0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,   /* to jungseong 0x1160~0x11A7 */
81         0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
82         0x73, 0x74, 0x75, 0x60, 0x84, 0x85, 0x88, 0x91, 0x92,
83         0x94, 0x9E, 0xA1, 0x00};
84
85 static const char hangul_compatibility_jongseong[] = {
86         0x33, 0x35, 0x36, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
87         0x3F, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
88         0x6F, 0x70, 0x82, 0x83, 0x00};
89
90 static const char hangul_jamo_jongseong[] = {
91         0xAA, 0xAC, 0xAD, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5,   /* to jongseong 0x11A8~0x11FF */
92         0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDD, 0xDF, 0xF1, 0xF2, 0x00};
93
94 static inline bool is_chosung(UChar src)
95 {
96         int unicode_value1 = 0;
97         int unicode_value2 = 0;
98
99         unicode_value1 = (0xFF00 & (src)) >> 8;
100         unicode_value2 = (0xFF & (src));
101
102         if (unicode_value1 == 0x31
103                         && (0x30 <= unicode_value2 && unicode_value2 <= 0x4e))   /* compatiblility jame */
104                 return true;
105
106         if (unicode_value1 == 0xA9
107                         && (0x60 <= unicode_value2  && unicode_value2 <= 0x7C)) /* jamo Extended-A */
108                 return true;
109
110         if (unicode_value1 == 0x11
111                         && (0x00 <= unicode_value2  && unicode_value2 <= 0x5E))  /* jamo */
112                 return true;
113
114         return false;
115 }
116
117 bool ctsvc_is_hangul(UChar src)
118 {
119         if ((0x1100 == (src & 0xFF00))       /* korean -Hangul Jamo*/
120                         || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_A_START, src, CTSVC_JAMO_A_END)
121                         || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_B_START, src, CTSVC_JAMO_B_END)
122                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, src, CTSVC_HAN_C_END)
123                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, src, CTSVC_HAN_HALF_END)
124                         || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_SYLLABLES_START, src, CTSVC_HAN_SYLLABLES_END))
125                 return true;
126         else
127                 return FALSE;
128 }
129
130
131 void ctsvc_hangul_compatibility2jamo(UChar *src)
132 {
133         int unicode_value1 = 0;
134         int unicode_value2 = 0;
135
136         unicode_value1 = (0xFF00 & (*src)) >> 8;
137         unicode_value2 = (0xFF & (*src));
138
139         /* korean -Hangul Jamo halfwidth*/
140         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
141                 unicode_value1 = 0x31;
142
143                 if (unicode_value2 < 0xBF)
144                         unicode_value2 -= 0x70;
145                 else if (unicode_value2 < 0xC8)
146                         unicode_value2 -= 0x73;
147                 else if (unicode_value2 < 0xD0)
148                         unicode_value2 -= 0x75;
149                 else if (unicode_value2 < 0xD8)
150                         unicode_value2 -= 0x77;
151                 else
152                         unicode_value2 -= 0x79;
153
154                 (*src) = unicode_value1 << 8 | unicode_value2;
155         }
156
157         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, *src, CTSVC_HAN_C_END)) {
158                 char *pos;
159                 if ((pos = strchr(hangul_compatibility_choseong, unicode_value2))) {
160                         unicode_value1 = 0x11;
161                         unicode_value2 = hangul_jamo_choseong[pos - hangul_compatibility_choseong];
162                         (*src) = unicode_value1 << 8 | unicode_value2;
163                 }
164                 else if ((pos = strchr(hangul_compatibility_jungseong, unicode_value2))) {
165                         unicode_value1 = 0x11;
166                         unicode_value2 = hangul_jamo_jungseong[pos - hangul_compatibility_jungseong];
167                         (*src) = unicode_value1 << 8 | unicode_value2;
168                 }
169                 else if ((pos = strchr(hangul_compatibility_jongseong, unicode_value2))) {
170                         unicode_value1 = 0x11;
171                         unicode_value2 = hangul_jamo_jongseong[pos - hangul_compatibility_jongseong];
172                         (*src) = unicode_value1 << 8 | unicode_value2;
173                 }
174         }
175 }
176
177 void ctsvc_hangul_jamo2compatibility(UChar *src)
178 {
179         int unicode_value1 = 0;
180         int unicode_value2 = 0;
181
182         unicode_value1 = (0xFF00 & (*src)) >> 8;
183         unicode_value2 = (0xFF & (*src));
184
185         /* korean -Hangul Jamo halfwidth*/
186         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
187                 unicode_value1 = 0x31;
188
189                 if (unicode_value2 < 0xBF)
190                         unicode_value2 -= 0x70;
191                 else if (unicode_value2 < 0xC8)
192                         unicode_value2 -= 0x73;
193                 else if (unicode_value2 < 0xD0)
194                         unicode_value2 -= 0x75;
195                 else if (unicode_value2 < 0xD8)
196                         unicode_value2 -= 0x77;
197                 else
198                         unicode_value2 -= 0x79;
199
200                 (*src) = unicode_value1 << 8 | unicode_value2;
201         }
202
203         if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_J_START, *src, CTSVC_HAN_J_END)) {
204                 char *pos;
205                 if ((pos = strchr(hangul_jamo_choseong, unicode_value2))) {
206                         unicode_value1 = 0x31;
207                         unicode_value2 = hangul_compatibility_choseong[pos - hangul_jamo_choseong];
208                         (*src) = unicode_value1 << 8 | unicode_value2;
209                 }
210                 else if ((pos = strchr(hangul_jamo_jungseong, unicode_value2))) {
211                         unicode_value1 = 0x31;
212                         unicode_value2 = hangul_compatibility_jungseong[pos - hangul_jamo_jungseong];
213                         (*src) = unicode_value1 << 8 | unicode_value2;
214                 }
215                 else if ((pos = strchr(hangul_jamo_jongseong, unicode_value2))) {
216                         unicode_value1 = 0x31;
217                         unicode_value2 = hangul_compatibility_jongseong[pos - hangul_jamo_jongseong];
218                         (*src) = unicode_value1 << 8 | unicode_value2;
219                 }
220         }
221 }
222
223 int ctsvc_get_chosung(const char *src, char *dest, int dest_size)
224 {
225         int32_t size;
226         UErrorCode status = 0;
227         UChar tmp_result[10];
228         UChar result[10];
229         int chosung_len=0, count = 0, i=0, j=0;
230         int char_len = 0;
231         int str_len = strlen(src);
232         char temp[dest_size];
233
234         for (i=0;i<str_len;i+=char_len) {
235                 char char_src[10];
236                 char_len = ctsvc_check_utf8(src[i]);
237                 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
238
239                 memcpy(char_src, &src[i], char_len);
240                 char_src[char_len] = '\0';
241
242                 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
243                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
244                                 "u_strFromUTF8() Fail(%s)", u_errorName(status));
245
246                 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
247                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
248                                 "u_strToLower() Fail(%s)", u_errorName(status));
249
250                 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
251                                 (UChar *)result, array_sizeof(result), &status);
252                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
253                                 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
254                 ctsvc_extra_normalize(result, size);
255                 u_strToUTF8(temp, dest_size, &size, result, -1, &status);
256                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
257                                 "u_strToUTF8() Fail(%s)", u_errorName(status));
258                 chosung_len = ctsvc_check_utf8(temp[0]);
259                 RETVM_IF(chosung_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
260                 memcpy(&dest[j], temp, chosung_len);
261                 j += chosung_len;
262                 count++;
263         }
264
265         dest[j] = '\0';
266
267         return count;
268 }
269
270 int ctsvc_get_korean_search_pattern(const char *src, char *dest, int dest_size)
271 {
272         int32_t size;
273         UErrorCode status = 0;
274         UChar tmp_result[10];
275         UChar result[10];
276         int i=0, j=0, count=0;
277         int char_len = 0;
278         int str_len = strlen(src);
279
280         for (i=0;i<str_len;i+=char_len) {
281                 char char_src[10];
282                 char_len = ctsvc_check_utf8(src[i]);
283                 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
284                 if (char_len == 1 && src[i] == ' ')
285                         continue;
286
287                 memcpy(char_src, &src[i], char_len);
288                 char_src[char_len] = '\0';
289
290                 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
291                 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
292                                 "u_strFromUTF8() Fail(%s)", u_errorName(status));
293
294                 if (is_chosung(tmp_result[0])) {
295                         ctsvc_hangul_compatibility2jamo(tmp_result);
296
297                         u_strToUTF8(&dest[j], dest_size - j, &size, tmp_result, -1, &status);
298                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
299                                         "u_strToUTF8() Fail(%s)", u_errorName(status));
300                         j += size;
301                         dest[j] = '*';
302                         j++;
303                 }
304                 else {
305                         u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
306                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
307                                         "u_strToUpper() Fail(%s)", u_errorName(status));
308                         size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
309                                         (UChar *)result, array_sizeof(result), &status);
310                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
311                                         "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
312                         ctsvc_extra_normalize(result, size);
313                         u_strToUTF8(&dest[j], dest_size - j, &size, result, -1, &status);
314                         RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
315                                         "u_strToUTF8() Fail(%s)", u_errorName(status));
316                         j += size;
317                 }
318                 count++;
319         }
320
321         dest[j] = '\0';
322         return count;
323 }
324
325 bool ctsvc_is_chosung(const char *src)
326 {
327         int char_len = 0;
328
329         char_len = ctsvc_check_utf8(src[0]);
330         if (char_len < 0) return false;   /* invalid value */
331
332         if (char_len == 3) {
333                 unsigned short tmp;
334
335                 tmp = (src[1] << 8) | src[2];
336                 if (((char)0xE1 == src[0] && CTSVC_COMPARE_BETWEEN(0x8480, tmp, 0x859F)) /* korean -Hangul Jamo*/
337                                 || ((char)0xE3 == src[0] && CTSVC_COMPARE_BETWEEN(0x84B1, tmp, 0x858E)) /* korean -Hangul Compatibility Jamo */
338                                 || ((char)0xEA == src[0] && CTSVC_COMPARE_BETWEEN(0xA5A0, tmp, 0xA5BC))) /* korean -Hangul Jamo extended A*/
339                         return true;
340         }
341         return false;
342 }
343
344 bool ctsvc_has_chosung(const char *src)
345 {
346         int  i=0;
347         int char_len = 0;
348         int str_len = strlen(src);
349
350         for (i=0;i<str_len;i+=char_len) {
351                 char_len = ctsvc_check_utf8(src[i]);
352                 if (ctsvc_is_chosung(&(src[i])))
353                         return true;
354         }
355         return false;
356 }
357
358 static bool __ctsvc_is_hangul(const char *src)
359 {
360         int char_len = 0;
361
362         char_len = ctsvc_check_utf8(src[0]);
363         if (char_len <= 0) return false;   /* invalid value */
364
365         if (char_len == 3) {
366                 switch(src[0]) {
367                 /*
368                  * Hangul Jamo : 0x1100 ~ 0x11FF
369                  *  e1 84 80 ~ e1 87 bf
370                  */
371                 case 0xE1:
372                         switch(src[1]) {
373                         case 0x84 ... 0x87:
374                                 if (0x80 <= src[2] && src[2] <= 0xBF)
375                                         return true;
376                                 else return false;
377                         default :
378                                 return false;
379                         }
380                         break;
381
382                 /*
383                  * Hangul Compatibility Jamo : 0x3130 ~ 0x318F
384                  *  e3 84 b0 ~ e3 84 bf
385                  *  e3 85 80 ~ e3 85 bf
386                  *  e3 86 80 ~ e3 86 8f
387                  */
388                 case 0xE3:
389                         switch(src[1]) {
390                         case 0x84:
391                                 if (0xB0 <= src[2] && src[2] <= 0xBF)
392                                         return true;
393                                 else return false;
394                         case 0x85:
395                                 if (0x80 <= src[2] && src[2] <= 0xBF)
396                                         return true;
397                                 else return false;
398                         case 0x86:
399                                 if (0x80 <= src[2] && src[2] <= 0x8F)
400                                         return true;
401                                 else return false;
402                         default :
403                                 return false;
404                         }
405                         break;
406
407                 /*
408                  * Hangul Jamo Extended A : 0xA960 ~ 0xA97F
409                  *  ea a5 a0  ~ ea a5 bf
410                  */
411                 /*
412                  * Hangul syllables : 0xAC00 ~ 0xD7AF
413                  *  ea b0 80 ~ ea bf bf
414                  */
415                 case 0xEA:
416                         switch(src[1]) {
417                         case 0xA5:
418                                 if (0xA0 <= src[2] && src[2] <= 0xBF)
419                                         return true;
420                                 else return false;
421                         case 0xB0 ... 0xBF:
422                                 if (0x80 <= src[2] && src[2] <= 0xBF)
423                                         return true;
424                                 else return false;
425                         default :
426                                 return false;
427                         }
428                         break;
429
430                 /*
431                  * Hangul syllables : 0xAC00 ~ 0xD7AF
432                  *  eb 80 80 ~ eb bf bf
433                  *  ec 80 80 ~ ec bf bf
434                  */
435                 case 0xEB ... 0xEC:
436                         switch(src[1]) {
437                         case 0x80 ... 0xBF:
438                                 if (0x80 <= src[2] && src[2] <= 0xBF)
439                                         return true;
440                                 else return false;
441                                 break;
442                         default :
443                                 return false;
444                         }
445                         break;
446
447                 /*
448                  * Hangul syllables : 0xAC00 ~ 0xD7AF
449                  *  ed 80 80 ~ ed 9e af
450                  */
451                 /*
452                  * Hangul Jamo Extended B : 0xD7B0 ~ 0xD7FF
453                  *  ed 9e b0 ~ ed 9f bf
454                  */
455                 case 0xED:
456                         switch(src[1]) {
457                         case 0x80 ... 0x9F:
458                                 if (0x80 <= src[2] && src[2] <= 0xBF)
459                                         return true;
460                                 else return false;
461                         default :
462                                 return false;
463                         }
464                         break;
465
466                 /*
467                  * Hangul halfwidth : 0xFFA0 ~ 0xFFDC
468                  *  ef be a0 ~ ef bf 9c
469                  */
470                 case 0xEF:
471                         switch(src[1]) {
472                         case 0xBE:
473                                 if (0xA0 <= src[2] && src[2] <= 0xBF)
474                                         return true;
475                                 else return false;
476                         case 0xbf:
477                                 if (0x80 <= src[2] && src[2] <= 0x9C)
478                                         return true;
479                                 else return false;
480                         default :
481                                 return false;
482                         }
483                         break;
484                 default:
485                         return false;
486                 }
487         }
488         return false;
489 }
490
491 bool ctsvc_has_korean(const char *src)
492 {
493         int  i=0;
494         int char_len = 0;
495         int str_len = strlen(src);
496
497         for (i=0;i<str_len;i+=char_len) {
498                 char_len = ctsvc_check_utf8(src[i]);
499                 RETV_IF(CONTACTS_ERROR_INVALID_PARAMETER == char_len, false);
500                 if (__ctsvc_is_hangul(&(src[i])))
501                         return true;
502         }
503         return false;
504 }
505