4 * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved.
6 * Contact: Hakjoo Ko <hakjoo.ko@samsung.com>
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
31 #include <unicode/utypes.h>
32 #include <unicode/ucol.h>
33 #include <unicode/uiter.h>
34 #include <unicode/ustring.h>
36 #include "collation.h"
38 #include "db-util-debug.h"
40 #define DB_UTIL_RETV_IF(cond,ret) \
43 DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
48 #define DB_UTIL_RET_IF(cond) \
51 DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
56 #define DB_UTIL_ERR_COL_FUNC_RET DB_UTIL_ERROR
59 DB_UTIL_ERR_DLOPEN = -10,
72 #define ICU_FUNC_CNT 8
74 typedef UCollator* (*ICU_UCOL_OPEN)(const char *, UErrorCode *);
75 typedef void (*ICU_UCOL_CLOSE)(UCollator *);
76 typedef void (*ICU_UCOL_SETSTRENGTH)(UCollator *, UCollationStrength);
77 typedef UCollationResult (*ICU_UCOL_STRCOLL)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t);
78 typedef UCollationResult (*ICU_UCOL_STRCOLLITER)(const UCollator *, UCharIterator *, UCharIterator *, UErrorCode *);
79 typedef void (*ICU_UITER_SETUTF8)(UCharIterator *, const char *, int32_t);
80 typedef void (*ICU_ULOC_SETDEFAULT)(const char* localeID, UErrorCode* status);
81 typedef const char* (*ICU_ULOC_GETDEFAULT)(void);
84 ICU_UCOL_OPEN icu_ucol_open;
85 ICU_UCOL_CLOSE icu_ucol_close;
86 ICU_UCOL_STRCOLL icu_ucol_strcoll;
87 ICU_UCOL_STRCOLLITER icu_ucol_strcollIter;
88 ICU_UCOL_SETSTRENGTH icu_ucol_setStrength;
89 ICU_UITER_SETUTF8 icu_uiter_setUTF8;
90 ICU_ULOC_SETDEFAULT icu_uloc_setDefault;
91 ICU_ULOC_GETDEFAULT icu_uloc_getDefault;
94 db_util_icu_func_t icu_symbol;
96 void *g_dl_icu_handle = NULL;
98 #ifdef DB_UTIL_ENABLE_DEVDEBUG
99 static char *strtoutf8(const UChar * unichars, int len);
102 static int __db_util_dl_load_icu()
105 void *icu_handle[ICU_FUNC_CNT] = { 0 };
109 const char *ICU_API[] = {
120 if(g_dl_icu_handle == NULL) {
121 g_dl_icu_handle = dlopen("libicui18n.so", RTLD_LAZY | RTLD_GLOBAL);
122 if(g_dl_icu_handle == NULL) {
123 DB_UTIL_TRACE_WARNING("dlopen icu so fail");
124 return DB_UTIL_ERR_DLOPEN;
128 for (i = 0; i < ICU_FUNC_CNT; i++) {
129 handle = dlsym(g_dl_icu_handle, ICU_API[i]);
130 if ((dl_error = dlerror()) != NULL) {
131 DB_UTIL_TRACE_WARNING("dlsym(%s) is failed for %s",
132 dl_error, ICU_API[i]);
133 return DB_UTIL_ERR_DLSYM;
135 icu_handle[i] = handle;
138 memcpy((void*)&icu_symbol, (const void*)icu_handle, sizeof(icu_handle));
143 /* The collating function must return an integer that is negative, zero or positive */
144 static int __db_util_collate_icu_16(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
146 #ifdef DB_UTIL_ENABLE_DEVDEBUG
147 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16 func start \n");
149 UChar* tmp_v1 = (UChar *)str1;
150 UChar* tmp_v2 = (UChar *)str2;
151 char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
152 char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
154 DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
155 DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
158 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
160 UCollationResult result = icu_symbol.icu_ucol_strcoll(
162 (const UChar *) str1, str1_len,
163 (const UChar *) str2, str2_len);
165 #ifdef DB_UTIL_ENABLE_DEVDEBUG
171 if (result == UCOL_LESS) {
172 DB_UTIL_TRACE_DEBUG("less \n");
173 } else if (result == UCOL_GREATER) {
174 DB_UTIL_TRACE_DEBUG("greater \n");
176 DB_UTIL_TRACE_DEBUG("equal \n");
183 /* The collating function must return an integer that is negative, zero or positive */
184 static int __db_util_collate_icu_16_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
186 #ifdef DB_UTIL_ENABLE_DEVDEBUG
187 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16_lc func start \n");
189 UChar* tmp_v1 = (UChar *)str1;
190 UChar* tmp_v2 = (UChar *)str2;
192 char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
193 char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
195 DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
196 DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
204 UCollationResult result = 0;
206 UChar* str_to = (UChar *)str1;
207 UChar* str_from = (UChar *)str1;
211 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
213 if(str1_len> str2_len) {
214 for(i=0;i<str2_len;i=i+2) {
218 while((int)(str_to-(UChar*)str1) <= str1_len) {
219 result = icu_symbol.icu_ucol_strcoll(
221 (UChar *) str_from, str_to-str_from,
222 (const UChar *) str2, str2_len);
224 if (result == UCOL_EQUAL)
226 #ifdef DB_UTIL_ENABLE_DEVDEBUG
227 DB_UTIL_TRACE_DEBUG("equal \n");
236 result = icu_symbol.icu_ucol_strcoll(
238 (const UChar *) str1, str1_len,
239 (const UChar *) str2, str2_len);
242 #ifdef DB_UTIL_ENABLE_DEVDEBUG
243 if (result == UCOL_LESS) {
244 DB_UTIL_TRACE_DEBUG("less \n");
245 } else if (result == UCOL_GREATER) {
246 DB_UTIL_TRACE_DEBUG("greater \n");
247 } else if (result == UCOL_EQUAL) {
248 DB_UTIL_TRACE_DEBUG("equal \n");
250 DB_UTIL_TRACE_DEBUG("compare error : %d \n", result);
257 /* The collating function must return an integer that is negative, zero or positive */
258 static int __db_util_collate_icu_8(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
260 UCharIterator uiter1, uiter2;
261 UErrorCode error = U_ZERO_ERROR;
263 #ifdef DB_UTIL_ENABLE_DEVDEBUG
264 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8 func start \n");
265 DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
268 DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
269 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
271 icu_symbol.icu_uiter_setUTF8(&uiter1, (const char *) str1, str1_len);
272 icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
274 UCollationResult result = icu_symbol.icu_ucol_strcollIter(
279 if(U_FAILURE(error)) {
280 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8 ucol_strcollIter error: %d\n", error);
281 return DB_UTIL_ERR_COL_FUNC_RET;
284 #ifdef DB_UTIL_ENABLE_DEVDEBUG
285 if (result == UCOL_LESS) {
286 DB_UTIL_TRACE_DEBUG("less \n");
287 } else if (result == UCOL_GREATER) {
288 DB_UTIL_TRACE_DEBUG("greater \n");
290 DB_UTIL_TRACE_DEBUG("equal \n");
297 static int __db_util_collate_icu_8_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
299 UCharIterator uiter1, uiter2;
300 UErrorCode error = U_ZERO_ERROR;
301 UCollationResult result = 0;
302 char* str_from = (char*)str1;
303 char* str_to = (char*)str1;
304 glong v1_char_len, v2_char_len;
307 #ifdef DB_UTIL_ENABLE_DEVDEBUG
308 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8_lc func start \n");
309 DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
312 DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
313 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
315 icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
317 v1_char_len = g_utf8_strlen((gchar *)str1,-1);
318 v2_char_len = g_utf8_strlen((gchar *)str2,-1);
320 if(v1_char_len > v2_char_len) {
321 for(i=0;i<v2_char_len;i++) {
322 str_to = g_utf8_next_char(str_to);
325 while((int)(str_to-(char*)str1) <= str1_len) {
326 icu_symbol.icu_uiter_setUTF8(
328 (const char *) str_from,
331 result = icu_symbol.icu_ucol_strcollIter(
336 if (U_FAILURE(error)) {
337 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
338 return DB_UTIL_ERR_COL_FUNC_RET;
341 if(result == UCOL_EQUAL)
342 #ifdef DB_UTIL_ENABLE_DEVDEBUG
344 DB_UTIL_TRACE_DEBUG("equal \n");
351 str_to = g_utf8_next_char(str_to);
352 str_from = g_utf8_next_char(str_from);
357 icu_symbol.icu_uiter_setUTF8(
362 result = icu_symbol.icu_ucol_strcollIter(
367 if (U_FAILURE(error)) {
368 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
369 return DB_UTIL_ERR_COL_FUNC_RET;
373 #ifdef DB_UTIL_ENABLE_DEVDEBUG
374 if (result == UCOL_LESS)
376 DB_UTIL_TRACE_DEBUG("less \n");
378 else if (result == UCOL_GREATER)
380 DB_UTIL_TRACE_DEBUG("greater \n");
382 else if(result == UCOL_EQUAL)
384 DB_UTIL_TRACE_DEBUG("equal \n");
388 DB_UTIL_TRACE_DEBUG("compare error : %d\n", result);
395 static void __db_util_collate_icu_close(void* ucol)
397 DB_UTIL_TRACE_DEBUG("close icu collator\n");
399 DB_UTIL_RET_IF(icu_symbol.icu_ucol_close == NULL);
401 icu_symbol.icu_ucol_close((UCollator *) ucol);
404 static int __db_util_collation_create(sqlite3* db_handle, char* locale, char* collator_name, UCollationStrength ucol_strength_value, int utf_type, int cmp_type)
407 UErrorCode status = U_ZERO_ERROR;
409 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_open == NULL, DB_UTIL_ERR_DLSYM);
410 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_setStrength == NULL, DB_UTIL_ERR_DLSYM);
412 UCollator* ucol = icu_symbol.icu_ucol_open(locale, &status);
413 if(status == U_USING_DEFAULT_WARNING) {
414 DB_UTIL_TRACE_ERROR("ucol_open success with default collate option\n");
415 } else if (U_FAILURE(status)) {
416 DB_UTIL_TRACE_ERROR("ucol_open fail : %d \n", status);
417 return DB_UTIL_ERR_ICU;
419 #ifdef DB_UTIL_ENABLE_DEVDEBUG
422 DB_UTIL_TRACE_DEBUG("ucol_open success : %d \n", status);
426 icu_symbol.icu_ucol_setStrength(ucol, ucol_strength_value);
427 if (U_FAILURE(status)) {
428 DB_UTIL_TRACE_ERROR("ucol_setStrength fail : %d \n", status);
429 return DB_UTIL_ERR_ICU;
431 DB_UTIL_TRACE_DEBUG("ucol_setStrength success \n");
434 if(utf_type == DB_UTIL_COL_UTF8) {
435 if(cmp_type == DB_UTIL_CMP_LC) {
436 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
437 __db_util_collate_icu_8_lc, (void(*)(void*))__db_util_collate_icu_close);
439 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
440 __db_util_collate_icu_8, (void(*)(void*))__db_util_collate_icu_close);
442 } else if(utf_type == DB_UTIL_COL_UTF16) {
443 if(cmp_type == DB_UTIL_CMP_LC) {
444 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
445 __db_util_collate_icu_16_lc, (void(*)(void*))__db_util_collate_icu_close);
447 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
448 __db_util_collate_icu_16, (void(*)(void*))__db_util_collate_icu_close);
451 DB_UTIL_TRACE_ERROR("wrong utf_type param value : %d\n", utf_type);
452 return DB_UTIL_ERR_PARAM;
455 if (err != SQLITE_OK) {
456 DB_UTIL_TRACE_ERROR("sqlite3_create_collation_v2 fail : %d \n", err);
457 __db_util_collate_icu_close((void*)ucol);
460 DB_UTIL_TRACE_DEBUG("sqlite3_create_collation_v2 success \n");
465 int db_util_create_collation(
466 PARAM_IN sqlite3 *db_handle,
467 PARAM_IN db_util_collate_type type,
468 PARAM_IN db_util_collate_textrep text_type,
469 PARAM_IN char* col_name)
471 int ret = DB_UTIL_OK;
472 UErrorCode status = U_ZERO_ERROR;
473 const char* locale = NULL;
475 DB_UTIL_TRACE_DEBUG("db_util_create_collation start");
477 ret = __db_util_dl_load_icu();
478 DB_UTIL_RETV_IF(ret != DB_UTIL_OK, DB_UTIL_ERROR);
480 /* get current locale */
481 icu_symbol.icu_uloc_setDefault((const char*)getenv("LANG"), &status);
482 locale = icu_symbol.icu_uloc_getDefault();
484 DB_UTIL_TRACE_WARNING("Fail to get current locale : %d", DB_UTIL_ERR_ENV);
485 return DB_UTIL_ERROR;
487 DB_UTIL_TRACE_DEBUG("locale : %s", locale);
489 #ifdef ENABLE_COL_KO_IC
490 if((db_util_collate_type == DB_UTIL_COL_KO_IC) ||
491 (db_util_collate_type == DB_UTIL_COL_KO_IC_LC)) {
492 if(strncmp(locale, "ko", 2) != 0) {
493 DB_UTIL_TRACE_WARNING("collate type is not match with current locale : %d", DB_UTIL_ERR_ENV);
494 return DB_UTIL_ERROR;
500 case DB_UTIL_COL_UCA :
501 ret = __db_util_collation_create(db_handle, NULL, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
503 case DB_UTIL_COL_LS_AS_CI :
504 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
506 case DB_UTIL_COL_LS_AI_CI :
507 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
509 case DB_UTIL_COL_LS_AI_CI_LC :
510 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
512 #ifdef ENABLE_COL_KO_IC
513 case DB_UTIL_COL_KO_IC :
514 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
516 case DB_UTIL_COL_KO_IC_LC :
517 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
521 DB_UTIL_TRACE_WARNING("wrong collate input type");
524 if(ret != DB_UTIL_OK)
530 #ifdef DB_UTIL_ENABLE_DEVDEBUG
532 static char *strtoutf8(const UChar * unichars, int len)
534 DB_UTIL_TRACE_WARNING("strtoutf8 start");
538 UErrorCode status = U_ZERO_ERROR;
540 lenstr = lenutf8 = 0;
541 lenstr = sizeof(char) * 4 * (len + 1);
542 pstr = (char *)malloc(lenstr);
543 if (!pstr)return NULL;
544 u_strToUTF8(pstr, lenstr, &lenutf8, unichars, len, &status);
545 if (U_FAILURE(status)) {
546 DB_UTIL_TRACE_WARNING("u_strToUTF8 failed in strtoutf8 :%s\n",
547 u_errorName(status));
550 DB_UTIL_TRACE_WARNING("strtoutf8 out : %s", pstr);