4 * Copyright (c) 2010 - 2012 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 #include <unicode/ustring.h>
21 #include <unicode/unorm.h>
22 #include <unicode/ucol.h>
23 #include <unicode/uset.h>
25 #include "ctsvc_internal.h"
26 #include "ctsvc_normalize.h"
27 #include "ctsvc_localize.h"
28 #include "ctsvc_localize_utils.h"
30 #include "ctsvc_localize_kor.h"
33 /* korean -Hangul Jamo */
34 #define CTSVC_HAN_J_START (UChar)0x1100
35 #define CTSVC_HAN_J_END (UChar)0x11FF
37 /* korean -Hangul Jamo extended A*/
38 #define CTSVC_JAMO_A_START (UChar)0xA960
39 #define CTSVC_JAMO_A_END (UChar)0xA97F
41 /* korean -Hangul Jamo extended B*/
42 #define CTSVC_JAMO_B_START (UChar)0xD7B0
43 #define CTSVC_JAMO_B_END (UChar)0xD7FF
45 /* korean -Hangul Compatability */
46 #define CTSVC_HAN_C_START (UChar)0x3130
47 #define CTSVC_HAN_C_END (UChar)0x318F
49 /* korean -Hangul halfwidth */
50 #define CTSVC_HAN_HALF_START (UChar)0xFFA0
51 #define CTSVC_HAN_HALF_END (UChar)0xFFDC
53 /* korean -Hangul Syllables */
54 #define CTSVC_HAN_SYLLABLES_START (UChar)0xAC00
55 #define CTSVC_HAN_SYLLABLES_END (UChar)0xD7A3
58 static const char hangul_compatibility_choseong[] = {
59 0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41,
60 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
61 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x65, 0x66, 0x6E,
62 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
63 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80,
64 0x81, 0x84, 0x85, 0x86, 0x31, 0x00};
66 static const char hangul_jamo_choseong[] = {
67 0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07, /* to choseong 0x1100~0x115F */
68 0x08, 0x21, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
69 0x10, 0x11, 0x12, 0x14, 0x15, 0x1C, 0x1D, 0x1E, 0x20,
70 0x22, 0x23, 0x27, 0x29, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
71 0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00, 0x00};
73 static const char hangul_compatibility_jungseong[] = {
74 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
75 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
76 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x87, 0x88,
77 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x00};
79 static const char hangul_jamo_jungseong[] = {
80 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, /* to jungseong 0x1160~0x11A7 */
81 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
82 0x73, 0x74, 0x75, 0x60, 0x84, 0x85, 0x88, 0x91, 0x92,
83 0x94, 0x9E, 0xA1, 0x00};
85 static const char hangul_compatibility_jongseong[] = {
86 0x33, 0x35, 0x36, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
87 0x3F, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
88 0x6F, 0x70, 0x82, 0x83, 0x00};
90 static const char hangul_jamo_jongseong[] = {
91 0xAA, 0xAC, 0xAD, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, /* to jongseong 0x11A8~0x11FF */
92 0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDD, 0xDF, 0xF1, 0xF2, 0x00};
94 static inline bool is_chosung(UChar src)
96 int unicode_value1 = 0;
97 int unicode_value2 = 0;
99 unicode_value1 = (0xFF00 & (src)) >> 8;
100 unicode_value2 = (0xFF & (src));
102 if (unicode_value1 == 0x31
103 && (0x30 <= unicode_value2 && unicode_value2 <= 0x4e)) /* compatiblility jame */
106 if (unicode_value1 == 0xA9
107 && (0x60 <= unicode_value2 && unicode_value2 <= 0x7C)) /* jamo Extended-A */
110 if (unicode_value1 == 0x11
111 && (0x00 <= unicode_value2 && unicode_value2 <= 0x5E)) /* jamo */
117 bool ctsvc_is_hangul(UChar src)
119 if ((0x1100 == (src & 0xFF00)) /* korean -Hangul Jamo*/
120 || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_A_START, src, CTSVC_JAMO_A_END)
121 || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_B_START, src, CTSVC_JAMO_B_END)
122 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, src, CTSVC_HAN_C_END)
123 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, src, CTSVC_HAN_HALF_END)
124 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_SYLLABLES_START, src, CTSVC_HAN_SYLLABLES_END))
131 void ctsvc_hangul_compatibility2jamo(UChar *src)
133 int unicode_value1 = 0;
134 int unicode_value2 = 0;
136 unicode_value1 = (0xFF00 & (*src)) >> 8;
137 unicode_value2 = (0xFF & (*src));
139 /* korean -Hangul Jamo halfwidth*/
140 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
141 unicode_value1 = 0x31;
143 if (unicode_value2 < 0xBF)
144 unicode_value2 -= 0x70;
145 else if (unicode_value2 < 0xC8)
146 unicode_value2 -= 0x73;
147 else if (unicode_value2 < 0xD0)
148 unicode_value2 -= 0x75;
149 else if (unicode_value2 < 0xD8)
150 unicode_value2 -= 0x77;
152 unicode_value2 -= 0x79;
154 (*src) = unicode_value1 << 8 | unicode_value2;
157 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, *src, CTSVC_HAN_C_END)) {
159 if ((pos = strchr(hangul_compatibility_choseong, unicode_value2))) {
160 unicode_value1 = 0x11;
161 unicode_value2 = hangul_jamo_choseong[pos - hangul_compatibility_choseong];
162 (*src) = unicode_value1 << 8 | unicode_value2;
164 else if ((pos = strchr(hangul_compatibility_jungseong, unicode_value2))) {
165 unicode_value1 = 0x11;
166 unicode_value2 = hangul_jamo_jungseong[pos - hangul_compatibility_jungseong];
167 (*src) = unicode_value1 << 8 | unicode_value2;
169 else if ((pos = strchr(hangul_compatibility_jongseong, unicode_value2))) {
170 unicode_value1 = 0x11;
171 unicode_value2 = hangul_jamo_jongseong[pos - hangul_compatibility_jongseong];
172 (*src) = unicode_value1 << 8 | unicode_value2;
177 void ctsvc_hangul_jamo2compatibility(UChar *src)
179 int unicode_value1 = 0;
180 int unicode_value2 = 0;
182 unicode_value1 = (0xFF00 & (*src)) >> 8;
183 unicode_value2 = (0xFF & (*src));
185 /* korean -Hangul Jamo halfwidth*/
186 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
187 unicode_value1 = 0x31;
189 if (unicode_value2 < 0xBF)
190 unicode_value2 -= 0x70;
191 else if (unicode_value2 < 0xC8)
192 unicode_value2 -= 0x73;
193 else if (unicode_value2 < 0xD0)
194 unicode_value2 -= 0x75;
195 else if (unicode_value2 < 0xD8)
196 unicode_value2 -= 0x77;
198 unicode_value2 -= 0x79;
200 (*src) = unicode_value1 << 8 | unicode_value2;
203 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_J_START, *src, CTSVC_HAN_J_END)) {
205 if ((pos = strchr(hangul_jamo_choseong, unicode_value2))) {
206 unicode_value1 = 0x31;
207 unicode_value2 = hangul_compatibility_choseong[pos - hangul_jamo_choseong];
208 (*src) = unicode_value1 << 8 | unicode_value2;
210 else if ((pos = strchr(hangul_jamo_jungseong, unicode_value2))) {
211 unicode_value1 = 0x31;
212 unicode_value2 = hangul_compatibility_jungseong[pos - hangul_jamo_jungseong];
213 (*src) = unicode_value1 << 8 | unicode_value2;
215 else if ((pos = strchr(hangul_jamo_jongseong, unicode_value2))) {
216 unicode_value1 = 0x31;
217 unicode_value2 = hangul_compatibility_jongseong[pos - hangul_jamo_jongseong];
218 (*src) = unicode_value1 << 8 | unicode_value2;
223 int ctsvc_get_chosung(const char *src, char *dest, int dest_size)
226 UErrorCode status = 0;
227 UChar tmp_result[10];
229 int chosung_len=0, count = 0, i=0, j=0;
231 int str_len = strlen(src);
232 char temp[dest_size];
234 for (i=0;i<str_len;i+=char_len) {
236 char_len = ctsvc_check_utf8(src[i]);
237 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
239 memcpy(char_src, &src[i], char_len);
240 char_src[char_len] = '\0';
242 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
243 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
244 "u_strFromUTF8() Fail(%s)", u_errorName(status));
246 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
247 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
248 "u_strToLower() Fail(%s)", u_errorName(status));
250 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
251 (UChar *)result, array_sizeof(result), &status);
252 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
253 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
254 ctsvc_extra_normalize(result, size);
255 u_strToUTF8(temp, dest_size, &size, result, -1, &status);
256 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
257 "u_strToUTF8() Fail(%s)", u_errorName(status));
258 chosung_len = ctsvc_check_utf8(temp[0]);
259 RETVM_IF(chosung_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
260 memcpy(&dest[j], temp, chosung_len);
270 int ctsvc_get_korean_search_pattern(const char *src, char *dest, int dest_size)
273 UErrorCode status = 0;
274 UChar tmp_result[10];
276 int i=0, j=0, count=0;
278 int str_len = strlen(src);
280 for (i=0;i<str_len;i+=char_len) {
282 char_len = ctsvc_check_utf8(src[i]);
283 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
284 if (char_len == 1 && src[i] == ' ')
287 memcpy(char_src, &src[i], char_len);
288 char_src[char_len] = '\0';
290 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
291 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
292 "u_strFromUTF8() Fail(%s)", u_errorName(status));
294 if (is_chosung(tmp_result[0])) {
295 ctsvc_hangul_compatibility2jamo(tmp_result);
297 u_strToUTF8(&dest[j], dest_size - j, &size, tmp_result, -1, &status);
298 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
299 "u_strToUTF8() Fail(%s)", u_errorName(status));
305 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
306 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
307 "u_strToUpper() Fail(%s)", u_errorName(status));
308 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
309 (UChar *)result, array_sizeof(result), &status);
310 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
311 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
312 ctsvc_extra_normalize(result, size);
313 u_strToUTF8(&dest[j], dest_size - j, &size, result, -1, &status);
314 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
315 "u_strToUTF8() Fail(%s)", u_errorName(status));
325 bool ctsvc_is_chosung(const char *src)
329 char_len = ctsvc_check_utf8(src[0]);
330 if (char_len < 0) return false; /* invalid value */
335 tmp = (src[1] << 8) | src[2];
336 if (((char)0xE1 == src[0] && CTSVC_COMPARE_BETWEEN(0x8480, tmp, 0x859F)) /* korean -Hangul Jamo*/
337 || ((char)0xE3 == src[0] && CTSVC_COMPARE_BETWEEN(0x84B1, tmp, 0x858E)) /* korean -Hangul Compatibility Jamo */
338 || ((char)0xEA == src[0] && CTSVC_COMPARE_BETWEEN(0xA5A0, tmp, 0xA5BC))) /* korean -Hangul Jamo extended A*/
344 bool ctsvc_has_chosung(const char *src)
348 int str_len = strlen(src);
350 for (i=0;i<str_len;i+=char_len) {
351 char_len = ctsvc_check_utf8(src[i]);
352 if (ctsvc_is_chosung(&(src[i])))
358 static bool __ctsvc_is_hangul(const char *src)
362 char_len = ctsvc_check_utf8(src[0]);
363 if (char_len <= 0) return false; /* invalid value */
368 * Hangul Jamo : 0x1100 ~ 0x11FF
369 * e1 84 80 ~ e1 87 bf
374 if (0x80 <= src[2] && src[2] <= 0xBF)
383 * Hangul Compatibility Jamo : 0x3130 ~ 0x318F
384 * e3 84 b0 ~ e3 84 bf
385 * e3 85 80 ~ e3 85 bf
386 * e3 86 80 ~ e3 86 8f
391 if (0xB0 <= src[2] && src[2] <= 0xBF)
395 if (0x80 <= src[2] && src[2] <= 0xBF)
399 if (0x80 <= src[2] && src[2] <= 0x8F)
408 * Hangul Jamo Extended A : 0xA960 ~ 0xA97F
409 * ea a5 a0 ~ ea a5 bf
412 * Hangul syllables : 0xAC00 ~ 0xD7AF
413 * ea b0 80 ~ ea bf bf
418 if (0xA0 <= src[2] && src[2] <= 0xBF)
422 if (0x80 <= src[2] && src[2] <= 0xBF)
431 * Hangul syllables : 0xAC00 ~ 0xD7AF
432 * eb 80 80 ~ eb bf bf
433 * ec 80 80 ~ ec bf bf
438 if (0x80 <= src[2] && src[2] <= 0xBF)
448 * Hangul syllables : 0xAC00 ~ 0xD7AF
449 * ed 80 80 ~ ed 9e af
452 * Hangul Jamo Extended B : 0xD7B0 ~ 0xD7FF
453 * ed 9e b0 ~ ed 9f bf
458 if (0x80 <= src[2] && src[2] <= 0xBF)
467 * Hangul halfwidth : 0xFFA0 ~ 0xFFDC
468 * ef be a0 ~ ef bf 9c
473 if (0xA0 <= src[2] && src[2] <= 0xBF)
477 if (0x80 <= src[2] && src[2] <= 0x9C)
491 bool ctsvc_has_korean(const char *src)
495 int str_len = strlen(src);
497 for (i=0;i<str_len;i+=char_len) {
498 char_len = ctsvc_check_utf8(src[i]);
499 RETV_IF(CONTACTS_ERROR_INVALID_PARAMETER == char_len, false);
500 if (__ctsvc_is_hangul(&(src[i])))