4 * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 #include <unicode/ustring.h>
21 #include <unicode/unorm.h>
22 #include <unicode/ucol.h>
23 #include <unicode/uset.h>
25 #include "ctsvc_internal.h"
26 #include "ctsvc_normalize.h"
27 #include "ctsvc_localize.h"
28 #include "ctsvc_localize_utils.h"
30 #include "ctsvc_localize_kor.h"
33 /* korean -Hangul Jamo */
34 #define CTSVC_HAN_J_START (UChar)0x1100
35 #define CTSVC_HAN_J_END (UChar)0x11FF
37 /* korean -Hangul Jamo extended A*/
38 #define CTSVC_JAMO_A_START (UChar)0xA960
39 #define CTSVC_JAMO_A_END (UChar)0xA97F
41 /* korean -Hangul Jamo extended B*/
42 #define CTSVC_JAMO_B_START (UChar)0xD7B0
43 #define CTSVC_JAMO_B_END (UChar)0xD7FF
45 /* korean -Hangul Compatability */
46 #define CTSVC_HAN_C_START (UChar)0x3130
47 #define CTSVC_HAN_C_END (UChar)0x318F
49 /* korean -Hangul halfwidth */
50 #define CTSVC_HAN_HALF_START (UChar)0xFFA0
51 #define CTSVC_HAN_HALF_END (UChar)0xFFDC
53 /* korean -Hangul Syllables */
54 #define CTSVC_HAN_SYLLABLES_START (UChar)0xAC00
55 #define CTSVC_HAN_SYLLABLES_END (UChar)0xD7A3
58 static const char hangul_compatibility_choseong[] = {
59 0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41,
60 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
61 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x65, 0x66, 0x6E,
62 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
63 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80,
64 0x81, 0x84, 0x85, 0x86, 0x31, 0x00};
66 static const char hangul_jamo_choseong[] = {
67 0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07, /* to choseong 0x1100~0x115F */
68 0x08, 0x21, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
69 0x10, 0x11, 0x12, 0x14, 0x15, 0x1C, 0x1D, 0x1E, 0x20,
70 0x22, 0x23, 0x27, 0x29, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
71 0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00, 0x00};
73 static const char hangul_compatibility_jungseong[] = {
74 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
75 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
76 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x87, 0x88,
77 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x00};
79 static const char hangul_jamo_jungseong[] = {
80 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, /* to jungseong 0x1160~0x11A7 */
81 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
82 0x73, 0x74, 0x75, 0x60, 0x84, 0x85, 0x88, 0x91, 0x92,
83 0x94, 0x9E, 0xA1, 0x00};
85 static const char hangul_compatibility_jongseong[] = {
86 0x33, 0x35, 0x36, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
87 0x3F, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
88 0x6F, 0x70, 0x82, 0x83, 0x00};
90 static const char hangul_jamo_jongseong[] = {
91 0xAA, 0xAC, 0xAD, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, /* to jongseong 0x11A8~0x11FF */
92 0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDD, 0xDF, 0xF1, 0xF2, 0x00};
94 static inline bool is_chosung(UChar src)
96 int unicode_value1 = 0;
97 int unicode_value2 = 0;
99 unicode_value1 = (0xFF00 & (src)) >> 8;
100 unicode_value2 = (0xFF & (src));
102 if (unicode_value1 == 0x31
103 && (0x30 <= unicode_value2 && unicode_value2 <= 0x4e)) /* compatiblility jame */
106 if (unicode_value1 == 0xA9
107 && (0x60 <= unicode_value2 && unicode_value2 <= 0x7C)) /* jamo Extended-A */
110 if (unicode_value1 == 0x11
111 && (0x00 <= unicode_value2 && unicode_value2 <= 0x5E)) /* jamo */
117 bool ctsvc_is_hangul(UChar src)
119 if ((0x1100 == (src & 0xFF00)) /* korean -Hangul Jamo*/
120 || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_A_START, src, CTSVC_JAMO_A_END)
121 || CTSVC_COMPARE_BETWEEN(CTSVC_JAMO_B_START, src, CTSVC_JAMO_B_END)
122 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, src, CTSVC_HAN_C_END)
123 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, src, CTSVC_HAN_HALF_END)
124 || CTSVC_COMPARE_BETWEEN(CTSVC_HAN_SYLLABLES_START, src, CTSVC_HAN_SYLLABLES_END)) {
132 void ctsvc_hangul_compatibility2jamo(UChar *src)
134 int unicode_value1 = 0;
135 int unicode_value2 = 0;
137 unicode_value1 = (0xFF00 & (*src)) >> 8;
138 unicode_value2 = (0xFF & (*src));
140 /* korean -Hangul Jamo halfwidth*/
141 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
142 unicode_value1 = 0x31;
144 if (unicode_value2 < 0xBF)
145 unicode_value2 -= 0x70;
146 else if (unicode_value2 < 0xC8)
147 unicode_value2 -= 0x73;
148 else if (unicode_value2 < 0xD0)
149 unicode_value2 -= 0x75;
150 else if (unicode_value2 < 0xD8)
151 unicode_value2 -= 0x77;
153 unicode_value2 -= 0x79;
155 (*src) = unicode_value1 << 8 | unicode_value2;
158 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_C_START, *src, CTSVC_HAN_C_END)) {
160 if ((pos = strchr(hangul_compatibility_choseong, unicode_value2))) {
161 unicode_value1 = 0x11;
162 unicode_value2 = hangul_jamo_choseong[pos - hangul_compatibility_choseong];
163 (*src) = unicode_value1 << 8 | unicode_value2;
164 } else if ((pos = strchr(hangul_compatibility_jungseong, unicode_value2))) {
165 unicode_value1 = 0x11;
166 unicode_value2 = hangul_jamo_jungseong[pos - hangul_compatibility_jungseong];
167 (*src) = unicode_value1 << 8 | unicode_value2;
168 } else if ((pos = strchr(hangul_compatibility_jongseong, unicode_value2))) {
169 unicode_value1 = 0x11;
170 unicode_value2 = hangul_jamo_jongseong[pos - hangul_compatibility_jongseong];
171 (*src) = unicode_value1 << 8 | unicode_value2;
176 void ctsvc_hangul_jamo2compatibility(UChar *src)
178 int unicode_value1 = 0;
179 int unicode_value2 = 0;
181 unicode_value1 = (0xFF00 & (*src)) >> 8;
182 unicode_value2 = (0xFF & (*src));
184 /* korean -Hangul Jamo halfwidth*/
185 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) {
186 unicode_value1 = 0x31;
188 if (unicode_value2 < 0xBF)
189 unicode_value2 -= 0x70;
190 else if (unicode_value2 < 0xC8)
191 unicode_value2 -= 0x73;
192 else if (unicode_value2 < 0xD0)
193 unicode_value2 -= 0x75;
194 else if (unicode_value2 < 0xD8)
195 unicode_value2 -= 0x77;
197 unicode_value2 -= 0x79;
199 (*src) = unicode_value1 << 8 | unicode_value2;
202 if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_J_START, *src, CTSVC_HAN_J_END)) {
204 if ((pos = strchr(hangul_jamo_choseong, unicode_value2))) {
205 unicode_value1 = 0x31;
206 unicode_value2 = hangul_compatibility_choseong[pos - hangul_jamo_choseong];
207 (*src) = unicode_value1 << 8 | unicode_value2;
208 } else if ((pos = strchr(hangul_jamo_jungseong, unicode_value2))) {
209 unicode_value1 = 0x31;
210 unicode_value2 = hangul_compatibility_jungseong[pos - hangul_jamo_jungseong];
211 (*src) = unicode_value1 << 8 | unicode_value2;
212 } else if ((pos = strchr(hangul_jamo_jongseong, unicode_value2))) {
213 unicode_value1 = 0x31;
214 unicode_value2 = hangul_compatibility_jongseong[pos - hangul_jamo_jongseong];
215 (*src) = unicode_value1 << 8 | unicode_value2;
220 int ctsvc_get_chosung(const char *src, char *dest, int dest_size)
223 UErrorCode status = 0;
224 UChar tmp_result[10];
226 int chosung_len = 0, count = 0, i = 0, j = 0;
228 int str_len = strlen(src);
229 char temp[dest_size];
231 for (i = 0; i < str_len; i += char_len) {
233 char_len = ctsvc_check_utf8(src[i]);
234 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
236 memcpy(char_src, &src[i], char_len);
237 char_src[char_len] = '\0';
239 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
240 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
241 "u_strFromUTF8() Fail(%s)", u_errorName(status));
243 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
244 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
245 "u_strToLower() Fail(%s)", u_errorName(status));
247 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
248 (UChar *)result, array_sizeof(result), &status);
249 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
250 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
251 ctsvc_extra_normalize(result, size);
252 u_strToUTF8(temp, dest_size, &size, result, -1, &status);
253 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
254 "u_strToUTF8() Fail(%s)", u_errorName(status));
255 chosung_len = ctsvc_check_utf8(temp[0]);
256 RETVM_IF(chosung_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
257 memcpy(&dest[j], temp, chosung_len);
267 int ctsvc_get_korean_search_pattern(const char *src, char *dest, int dest_size)
270 UErrorCode status = 0;
271 UChar tmp_result[10];
273 int i = 0, j = 0, count = 0;
275 int str_len = strlen(src);
277 for (i = 0; i < str_len; i += char_len) {
279 char_len = ctsvc_check_utf8(src[i]);
280 RETVM_IF(char_len <= 0, CONTACTS_ERROR_INVALID_PARAMETER, "check_utf8 Fail");
281 if (char_len == 1 && src[i] == ' ')
284 memcpy(char_src, &src[i], char_len);
285 char_src[char_len] = '\0';
287 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
288 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
289 "u_strFromUTF8() Fail(%s)", u_errorName(status));
291 if (is_chosung(tmp_result[0])) {
292 ctsvc_hangul_compatibility2jamo(tmp_result);
294 u_strToUTF8(&dest[j], dest_size - j, &size, tmp_result, -1, &status);
295 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
296 "u_strToUTF8() Fail(%s)", u_errorName(status));
301 u_strToUpper(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
302 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
303 "u_strToUpper() Fail(%s)", u_errorName(status));
304 size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
305 (UChar *)result, array_sizeof(result), &status);
306 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
307 "unorm_normalize(%s) Fail(%s)", src, u_errorName(status));
308 ctsvc_extra_normalize(result, size);
309 u_strToUTF8(&dest[j], dest_size - j, &size, result, -1, &status);
310 RETVM_IF(U_FAILURE(status), CONTACTS_ERROR_SYSTEM,
311 "u_strToUTF8() Fail(%s)", u_errorName(status));
321 bool ctsvc_is_chosung(const char *src)
325 char_len = ctsvc_check_utf8(src[0]);
326 if (char_len < 0) return false; /* invalid value */
331 tmp = (src[1] << 8) | src[2];
332 if (((char)0xE1 == src[0] && CTSVC_COMPARE_BETWEEN(0x8480, tmp, 0x859F)) /* korean -Hangul Jamo*/
333 || ((char)0xE3 == src[0] && CTSVC_COMPARE_BETWEEN(0x84B1, tmp, 0x858E)) /* korean -Hangul Compatibility Jamo */
334 || ((char)0xEA == src[0] && CTSVC_COMPARE_BETWEEN(0xA5A0, tmp, 0xA5BC))) /* korean -Hangul Jamo extended A*/
340 bool ctsvc_has_chosung(const char *src)
344 int str_len = strlen(src);
346 for (i = 0; i < str_len; i += char_len) {
347 char_len = ctsvc_check_utf8(src[i]);
348 if (ctsvc_is_chosung(&(src[i])))
354 static bool __ctsvc_is_hangul(const char *src)
358 char_len = ctsvc_check_utf8(src[0]);
359 if (char_len <= 0) return false; /* invalid value */
364 * Hangul Jamo : 0x1100 ~ 0x11FF
365 * e1 84 80 ~ e1 87 bf
370 if (0x80 <= src[2] && src[2] <= 0xBF)
379 * Hangul Compatibility Jamo : 0x3130 ~ 0x318F
380 * e3 84 b0 ~ e3 84 bf
381 * e3 85 80 ~ e3 85 bf
382 * e3 86 80 ~ e3 86 8f
387 if (0xB0 <= src[2] && src[2] <= 0xBF)
391 if (0x80 <= src[2] && src[2] <= 0xBF)
395 if (0x80 <= src[2] && src[2] <= 0x8F)
404 * Hangul Jamo Extended A : 0xA960 ~ 0xA97F
405 * ea a5 a0 ~ ea a5 bf
408 * Hangul syllables : 0xAC00 ~ 0xD7AF
409 * ea b0 80 ~ ea bf bf
414 if (0xA0 <= src[2] && src[2] <= 0xBF)
418 if (0x80 <= src[2] && src[2] <= 0xBF)
427 * Hangul syllables : 0xAC00 ~ 0xD7AF
428 * eb 80 80 ~ eb bf bf
429 * ec 80 80 ~ ec bf bf
434 if (0x80 <= src[2] && src[2] <= 0xBF)
444 * Hangul syllables : 0xAC00 ~ 0xD7AF
445 * ed 80 80 ~ ed 9e af
448 * Hangul Jamo Extended B : 0xD7B0 ~ 0xD7FF
449 * ed 9e b0 ~ ed 9f bf
454 if (0x80 <= src[2] && src[2] <= 0xBF)
463 * Hangul halfwidth : 0xFFA0 ~ 0xFFDC
464 * ef be a0 ~ ef bf 9c
469 if (0xA0 <= src[2] && src[2] <= 0xBF)
473 if (0x80 <= src[2] && src[2] <= 0x9C)
487 bool ctsvc_has_korean(const char *src)
491 int str_len = strlen(src);
493 for (i = 0; i < str_len; i += char_len) {
494 char_len = ctsvc_check_utf8(src[i]);
495 RETV_IF(CONTACTS_ERROR_INVALID_PARAMETER == char_len, false);
496 if (__ctsvc_is_hangul(&(src[i])))