4 * Copyright (c) 2010 - 2015 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
21 #include <unicode/ulocdata.h>
22 #include <unicode/ustring.h>
23 #include <unicode/unorm.h>
24 #include <unicode/ucol.h>
25 #include <unicode/uset.h>
28 #include "ctsvc_internal.h"
29 #include "ctsvc_normalize.h"
30 #include "ctsvc_localize.h"
31 #include "ctsvc_localize_utils.h"
33 #define CTSVC_COMBINING_DIACRITICAL_MARKS_START 0x0300
34 #define CTSVC_COMBINING_DIACRITICAL_MARKS_END 0x036f
40 } hiragana_group_letter;
42 static hiragana_group_letter hiragana_group[13] = {
43 {0x3042, 0x41, 0x4a}, /* ぁ あ ぃ い ぅ う ぇ え ぉ お */
44 {0x3042, 0x94, 0x94}, /* ゔ */
45 {0x304b, 0x4b, 0x54}, /* か が き ぎ く ぐ け げ こ ご */
46 {0x304b, 0x95, 0x96}, /* ゕ ゖ */
47 {0x3055, 0x55, 0x5e}, /* さ ざ し じ す ず せ ぜ そ ぞ */
48 {0x305f, 0x5f, 0x69}, /* た だ ち ぢ っ つ づ て で と ど */
49 {0x306a, 0x6a, 0x6e}, /* な に ぬ ね の */
50 {0x306f, 0x6f, 0x7d}, /* は ば ぱ ひ び ぴ ふ ぶ ぷ へ べ ぺ ほ ぼ ぽ */
51 {0x307e, 0x7e, 0x82}, /* ま み む め も */
52 {0x3084, 0x83, 0x88}, /* ゃ や ゅ ゆ ょ よ*/
53 {0x3089, 0x89, 0x8d}, /* ら り る れ ろ */
54 {0x308f, 0x8e, 0x92}, /* ゎ わ */
55 {0x3093, 0x93, 0x93}, /* ゐ ゑ を */
58 static int __ctsvc_remove_special_char(const char *src, char *dest, int dest_size)
60 int s_pos = 0, d_pos = 0, char_type, src_size;
64 ERR("The parameter(src) is NULL");
69 src_size = strlen(src);
71 while (src[s_pos] != 0) {
72 char_type = ctsvc_check_utf8(src[s_pos]);
74 if (0 < char_type && char_type < dest_size - d_pos && char_type <= src_size - s_pos) {
75 memcpy(dest+d_pos, src+s_pos, char_type);
80 ERR("The parameter(src:%s) has invalid character set", src);
82 return CONTACTS_ERROR_INVALID_PARAMETER;
91 static inline int __ctsvc_collation_str(const char *src, char **dest)
94 UErrorCode status = U_ZERO_ERROR;
95 UChar *tmp_result = NULL;
98 char *region = strdup(ctsvc_get_langset());
100 /* LCOV_EXCL_START */
101 ERR("strdup() Fail");
102 return CONTACTS_ERROR_OUT_OF_MEMORY;
106 char *dot = strchr(region, '.');
110 collator = ucol_open(region, &status);
111 if (U_FAILURE(status)) {
112 /* LCOV_EXCL_START */
113 ERR("ucol_open Fail(%s)", u_errorName(status));
115 return CONTACTS_ERROR_SYSTEM;
119 /* TODO: ucol_setAttribute is not called */
120 if (U_FAILURE(status)) {
121 /* LCOV_EXCL_START */
122 ERR("ucol_setAttribute Fail(%s)", u_errorName(status));
124 ucol_close(collator);
125 return CONTACTS_ERROR_SYSTEM;
129 u_strFromUTF8(NULL, 0, &size, src, strlen(src), &status);
130 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
131 /* LCOV_EXCL_START */
132 ERR("u_strFromUTF8 to get the dest length Fail(%s)", u_errorName(status));
134 ucol_close(collator);
135 return CONTACTS_ERROR_SYSTEM;
138 status = U_ZERO_ERROR;
139 tmp_result = calloc(1, sizeof(UChar) * (size + 1));
140 u_strFromUTF8(tmp_result, size + 1, NULL, src, -1, &status);
141 if (U_FAILURE(status)) {
142 /* LCOV_EXCL_START */
143 ERR("u_strFromUTF8 Fail(%s)", u_errorName(status));
146 ucol_close(collator);
147 return CONTACTS_ERROR_SYSTEM;
151 size = ucol_getSortKey(collator, tmp_result, -1, NULL, 0);
152 *dest = calloc(1, sizeof(uint8_t) * (size + 1));
153 size = ucol_getSortKey(collator, tmp_result, -1, (uint8_t *)*dest, size + 1);
155 ucol_close(collator);
158 return CONTACTS_ERROR_NONE;
161 int ctsvc_collation_str(char *src, char **dest)
164 char temp[SAFE_STRLEN(src) + 1];
166 ret = __ctsvc_remove_special_char(src, temp, sizeof(temp));
167 WARN_IF(ret < CONTACTS_ERROR_NONE, "__ctsvc_remove_special_char() Fail(%d)", ret);
169 return __ctsvc_collation_str(temp, dest);
172 static int __ctsvc_normalize_str(const char *src, char **dest, bool is_index)
174 int32_t tmp_size = 100;
177 UErrorCode status = 0;
178 UChar *tmp_result = NULL;
179 UChar *tmp_upper = NULL;
180 UChar *result = NULL;
182 tmp_result = calloc(1, sizeof(UChar)*(tmp_size+1));
183 if (NULL == tmp_result) {
184 /* LCOV_EXCL_START */
185 ERR("calloc() Fail");
186 return CONTACTS_ERROR_OUT_OF_MEMORY;
190 u_strFromUTF8(tmp_result, tmp_size + 1, &tmp_size, src, -1, &status);
191 if (status == U_BUFFER_OVERFLOW_ERROR) {
192 status = U_ZERO_ERROR;
194 tmp_result = calloc(1, sizeof(UChar) * (tmp_size + 1));
195 if (NULL == tmp_result) {
196 /* LCOV_EXCL_START */
197 ERR("calloc() Fail");
198 return CONTACTS_ERROR_OUT_OF_MEMORY;
202 u_strFromUTF8(tmp_result, tmp_size + 1, NULL, src, -1, &status);
203 if (U_FAILURE(status)) {
204 /* LCOV_EXCL_START */
205 ERR("u_strFromUTF8()Fail(%s)", u_errorName(status));
207 return CONTACTS_ERROR_SYSTEM;
210 } else if (U_FAILURE(status)) {
211 /* LCOV_EXCL_START */
212 ERR("u_strFromUTF8() Fail(%s)", u_errorName(status));
214 return CONTACTS_ERROR_SYSTEM;
218 tmp_upper = calloc(1, sizeof(UChar)*(tmp_size+1));
219 if (NULL == tmp_upper) {
220 /* LCOV_EXCL_START */
221 ERR("calloc() Fail");
223 return CONTACTS_ERROR_OUT_OF_MEMORY;
227 upper_size = u_strToUpper(tmp_upper, tmp_size+1, tmp_result, -1, NULL, &status);
228 if (status == U_BUFFER_OVERFLOW_ERROR) {
229 status = U_ZERO_ERROR;
231 tmp_upper = calloc(1, sizeof(UChar) * (upper_size + 1));
232 if (NULL == tmp_upper) {
233 /* LCOV_EXCL_START */
234 ERR("calloc() Fail");
236 return CONTACTS_ERROR_OUT_OF_MEMORY;
240 u_strFromUTF8(tmp_upper, upper_size + 1, NULL, src, -1, &status);
241 if (U_FAILURE(status)) {
242 /* LCOV_EXCL_START */
243 ERR("u_strFromUTF8()Fail(%s)", u_errorName(status));
246 return CONTACTS_ERROR_SYSTEM;
249 } else if (U_FAILURE(status)) {
250 /* LCOV_EXCL_START */
251 ERR("u_strToUpper() Fail(%s)", u_errorName(status));
254 return CONTACTS_ERROR_SYSTEM;
258 result = calloc(1, sizeof(UChar)*(size+1));
259 if (NULL == result) {
260 /* LCOV_EXCL_START */
261 ERR("calloc() Fail");
264 return CONTACTS_ERROR_OUT_OF_MEMORY;
268 size = unorm_normalize(tmp_upper, -1, UNORM_NFD, 0, result, size+1, &status);
269 if (status == U_BUFFER_OVERFLOW_ERROR) {
270 status = U_ZERO_ERROR;
272 result = calloc(1, sizeof(UChar)*(size + 1));
273 if (NULL == result) {
274 /* LCOV_EXCL_START */
275 ERR("calloc() Fail");
278 return CONTACTS_ERROR_OUT_OF_MEMORY;
282 unorm_normalize(tmp_upper, -1, UNORM_NFD, 0, result, size+1, &status);
283 if (U_FAILURE(status)) {
284 /* LCOV_EXCL_START */
285 ERR("unorm_normalize() Fail(%s)", u_errorName(status));
289 return CONTACTS_ERROR_SYSTEM;
292 } else if (U_FAILURE(status)) {
293 /* LCOV_EXCL_START */
294 ERR("unorm_normalize() Fail(%s)", u_errorName(status));
298 return CONTACTS_ERROR_SYSTEM;
302 ctsvc_check_language(result);
304 ctsvc_extra_index_normalize(result, size);
306 ctsvc_extra_normalize(result, size);
308 /* remove diacritical : U+3000 ~ U+034F */
310 UChar *temp_result = NULL;
311 temp_result = calloc(1, sizeof(UChar)*(size+1));
312 if (NULL == temp_result) {
313 /* LCOV_EXCL_START */
314 ERR("calloc() Fail");
318 return CONTACTS_ERROR_OUT_OF_MEMORY;
322 bool replaced = false;
323 for (i = 0, j = 0; i < size; i++) {
324 if (CTSVC_COMPARE_BETWEEN((UChar)CTSVC_COMBINING_DIACRITICAL_MARKS_START,
325 result[i], (UChar)CTSVC_COMBINING_DIACRITICAL_MARKS_END)) {
328 temp_result[j++] = result[i];
333 temp_result[j] = 0x0;
335 result = temp_result;
340 u_strToUTF8(NULL, 0, &size, result, -1, &status);
341 status = U_ZERO_ERROR;
342 *dest = calloc(1, sizeof(char) * (size+1));
344 /* LCOV_EXCL_START */
345 ERR("calloc() Fail");
349 return CONTACTS_ERROR_OUT_OF_MEMORY;
353 u_strToUTF8(*dest, size+1, NULL, result, -1, &status);
354 if (U_FAILURE(status)) {
355 /* LCOV_EXCL_START */
356 ERR("u_strToUTF8() Fail(%s)", u_errorName(status));
362 return CONTACTS_ERROR_SYSTEM;
368 return CONTACTS_ERROR_NONE;
371 static int __ctsvc_convert_halfwidth_ascii_and_symbol(const char *src, UChar *dest, int dest_size, int* str_size)
374 int32_t size = dest_size;
375 UErrorCode status = 0;
377 u_strFromUTF8(dest, dest_size, &size, src, strlen(src), &status);
378 if (U_FAILURE(status)) {
379 /* LCOV_EXCL_START */
380 ERR("u_strFromUTF8() Fail(%s)", u_errorName(status));
381 return CONTACTS_ERROR_SYSTEM;
387 /* full width -> half width */
388 for (i = 0; i < size; i++) {
389 /* FF00 ~ FF60: Fullwidth ASCII variants */
390 if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, dest[i], (UChar)0xFF60)) {
391 int unicode_value1 = 0;
392 int unicode_value2 = 0;
393 unicode_value1 = 0x0;
394 unicode_value2 = (0xFF & dest[i]) + 0x20;
395 dest[i] = unicode_value1 << 8 | unicode_value2;
396 } else if (CTSVC_COMPARE_BETWEEN((UChar)0xFFE0, dest[i], (UChar)0xFFE6)) {
397 /* FFE0~FFE6: Fullwidth symbol variants */
398 if (dest[i] == (UChar)0xFFE0)
399 dest[i] = (UChar)0x00A2;
400 else if (dest[i] == (UChar)0xFFE1)
401 dest[i] = (UChar)0x00A3;
402 else if (dest[i] == (UChar)0xFFE2)
403 dest[i] = (UChar)0x00AC;
404 else if (dest[i] == (UChar)0xFFE3)
405 dest[i] = (UChar)0x00AF;
406 else if (dest[i] == (UChar)0xFFE4)
407 dest[i] = (UChar)0x00A6;
408 else if (dest[i] == (UChar)0xFFE5)
409 dest[i] = (UChar)0x00A5;
410 else if (dest[i] == (UChar)0xFFE6)
411 dest[i] = (UChar)0x20A9;
418 return CONTACTS_ERROR_NONE;
421 #define LARGE_BUFFER_SIZE 100
423 int ctsvc_get_halfwidth_string(const char *src, char **dest, int *dest_size)
425 UChar unicodes[LARGE_BUFFER_SIZE+1];
428 if (CONTACTS_ERROR_NONE != __ctsvc_convert_halfwidth_ascii_and_symbol(src, unicodes, LARGE_BUFFER_SIZE, &ustr_size)) {
429 /* LCOV_EXCL_START */
430 ERR("__ctsvc_convert_halfwidth_ascii_and_symbol() Fail");
431 return CONTACTS_ERROR_SYSTEM;
435 UErrorCode status = 0;
439 u_strToUTF8(NULL, 0, &size, unicodes, -1, &status);
440 status = U_ZERO_ERROR;
441 *dest = calloc(1, sizeof(char) * (size+1));
443 u_strToUTF8(*dest, size+1, dest_size, unicodes, ustr_size, &status);
444 if (U_FAILURE(status)) {
445 /* LCOV_EXCL_START */
446 ERR("u_strToUTF8() Fail(%s)", u_errorName(status));
452 return CONTACTS_ERROR_SYSTEM;
456 return CONTACTS_ERROR_NONE;
459 int ctsvc_normalize_str(const char *src, char **dest)
461 int ret = CONTACTS_ERROR_NONE;
462 char temp[strlen(src) + 1];
464 ret = __ctsvc_remove_special_char(src, temp, strlen(src) + 1);
465 RETVM_IF(ret < CONTACTS_ERROR_NONE, ret, "__ctsvc_remove_special_char() Fail(%d)", ret);
467 ret = __ctsvc_normalize_str(temp, dest, false);
471 static void __ctsvc_convert_japanese_group_letter(char *dest)
473 int i, size, dest_len;
474 UErrorCode status = 0;
476 UChar result[2] = {0x00};
479 dest_len = strlen(dest) + 1;
480 u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, dest, -1, &status);
481 RETM_IF(U_FAILURE(status), "u_strFromUTF8() Fail(%s)", u_errorName(status));
483 unicode_value = (0xFF & (tmp_result[0]));
485 for (i = 0; i < 13; i++) {
486 if (hiragana_group[i].start <= unicode_value
487 && unicode_value <= hiragana_group[i].end)
488 result[0] = hiragana_group[i].letter;
491 u_strToUTF8(dest, dest_len, &size, result, -1, &status);
492 RETM_IF(U_FAILURE(status), "u_strToUTF8() Fail(%s)", u_errorName(status));
496 static bool __ctsvc_check_range_out_index(const char src[])
498 if (src[0] == 0xe2 && src[1] == 0x80 && src[2] == 0xa6)
504 int ctsvc_normalize_index(const char *src, char **dest)
506 int ret = CONTACTS_ERROR_NONE;
507 char first_str[10] = {0};
510 if (first_str[0] == '\0' || __ctsvc_check_range_out_index(first_str)) {
511 length = ctsvc_check_utf8(src[0]);
512 RETVM_IF(length <= 0, CONTACTS_ERROR_INTERNAL, "check_utf8() Fail");
514 memset(first_str, 0x00, sizeof(first_str));
515 strncpy(first_str, src, length);
516 if (length != strlen(first_str)) {
517 /* LCOV_EXCL_START */
518 ERR("length : %d, first_str : %s, strlne : %d", length, first_str, strlen(first_str));
519 return CONTACTS_ERROR_INVALID_PARAMETER;
523 ret = __ctsvc_normalize_str(first_str, dest, true);
524 RETVM_IF(dest == NULL, ret, "__ctsvc_normalize_str() Fail");
526 if ((*dest)[0] != '\0') {
527 length = ctsvc_check_utf8((*dest)[0]);
528 (*dest)[length] = '\0';
531 if (ret == CTSVC_LANG_JAPANESE)
532 __ctsvc_convert_japanese_group_letter(*dest);