4 * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved.
6 * Contact: Hakjoo Ko <hakjoo.ko@samsung.com>
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
31 #include <unicode/utypes.h>
32 #include <unicode/ucol.h>
33 #include <unicode/uiter.h>
34 #include <unicode/ustring.h>
36 #include "collation.h"
38 #include "db-util-debug.h"
40 #define DB_UTIL_RETV_IF(cond,ret) \
43 DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
48 #define DB_UTIL_RET_IF(cond) \
51 DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
56 #define DB_UTIL_ERR_COL_FUNC_RET DB_UTIL_ERROR
57 #define ICUI18N_LIBPATH "/usr/lib/libicui18n.so"
60 DB_UTIL_ERR_DLOPEN = -10,
73 #define ICU_FUNC_CNT 8
75 typedef UCollator* (*ICU_UCOL_OPEN)(const char *, UErrorCode *);
76 typedef void (*ICU_UCOL_CLOSE)(UCollator *);
77 typedef void (*ICU_UCOL_SETSTRENGTH)(UCollator *, UCollationStrength);
78 typedef UCollationResult (*ICU_UCOL_STRCOLL)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t);
79 typedef UCollationResult (*ICU_UCOL_STRCOLLITER)(const UCollator *, UCharIterator *, UCharIterator *, UErrorCode *);
80 typedef void (*ICU_UITER_SETUTF8)(UCharIterator *, const char *, int32_t);
81 typedef void (*ICU_ULOC_SETDEFAULT)(const char* localeID, UErrorCode* status);
82 typedef const char* (*ICU_ULOC_GETDEFAULT)(void);
85 ICU_UCOL_OPEN icu_ucol_open;
86 ICU_UCOL_CLOSE icu_ucol_close;
87 ICU_UCOL_STRCOLL icu_ucol_strcoll;
88 ICU_UCOL_STRCOLLITER icu_ucol_strcollIter;
89 ICU_UCOL_SETSTRENGTH icu_ucol_setStrength;
90 ICU_UITER_SETUTF8 icu_uiter_setUTF8;
91 ICU_ULOC_SETDEFAULT icu_uloc_setDefault;
92 ICU_ULOC_GETDEFAULT icu_uloc_getDefault;
95 db_util_icu_func_t icu_symbol;
97 void *g_dl_icu_handle = NULL;
99 #ifdef DB_UTIL_ENABLE_DEVDEBUG
100 static char *strtoutf8(const UChar * unichars, int len);
103 static int __db_util_dl_load_icu()
106 void *icu_handle[ICU_FUNC_CNT] = { 0 };
110 const char *ICU_API[] = {
121 if(g_dl_icu_handle == NULL) {
122 g_dl_icu_handle = dlopen(ICUI18N_LIBPATH, RTLD_LAZY | RTLD_GLOBAL);
123 if(g_dl_icu_handle == NULL) {
124 DB_UTIL_TRACE_WARNING("dlopen icu so fail");
125 return DB_UTIL_ERR_DLOPEN;
129 for (i = 0; i < ICU_FUNC_CNT; i++) {
130 handle = dlsym(g_dl_icu_handle, ICU_API[i]);
131 if ((dl_error = dlerror()) != NULL) {
132 DB_UTIL_TRACE_WARNING("dlsym(%s) is failed for %s",
133 dl_error, ICU_API[i]);
134 return DB_UTIL_ERR_DLSYM;
136 icu_handle[i] = handle;
139 memcpy((void*)&icu_symbol, (const void*)icu_handle, sizeof(icu_handle));
144 /* The collating function must return an integer that is negative, zero or positive */
145 static int __db_util_collate_icu_16(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
147 #ifdef DB_UTIL_ENABLE_DEVDEBUG
148 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16 func start \n");
150 UChar* tmp_v1 = (UChar *)str1;
151 UChar* tmp_v2 = (UChar *)str2;
152 char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
153 char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
155 DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
156 DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
159 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
161 UCollationResult result = icu_symbol.icu_ucol_strcoll(
163 (const UChar *) str1, str1_len,
164 (const UChar *) str2, str2_len);
166 #ifdef DB_UTIL_ENABLE_DEVDEBUG
172 if (result == UCOL_LESS) {
173 DB_UTIL_TRACE_DEBUG("less \n");
174 } else if (result == UCOL_GREATER) {
175 DB_UTIL_TRACE_DEBUG("greater \n");
177 DB_UTIL_TRACE_DEBUG("equal \n");
184 /* The collating function must return an integer that is negative, zero or positive */
185 static int __db_util_collate_icu_16_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
187 #ifdef DB_UTIL_ENABLE_DEVDEBUG
188 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16_lc func start \n");
190 UChar* tmp_v1 = (UChar *)str1;
191 UChar* tmp_v2 = (UChar *)str2;
193 char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
194 char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
196 DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
197 DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
205 UCollationResult result = 0;
207 UChar* str_to = (UChar *)str1;
208 UChar* str_from = (UChar *)str1;
212 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
214 if(str1_len> str2_len) {
215 for(i=0;i<str2_len;i=i+2) {
219 while((int)(str_to-(UChar*)str1) <= str1_len) {
220 result = icu_symbol.icu_ucol_strcoll(
222 (UChar *) str_from, str_to-str_from,
223 (const UChar *) str2, str2_len);
225 if (result == UCOL_EQUAL)
227 #ifdef DB_UTIL_ENABLE_DEVDEBUG
228 DB_UTIL_TRACE_DEBUG("equal \n");
237 result = icu_symbol.icu_ucol_strcoll(
239 (const UChar *) str1, str1_len,
240 (const UChar *) str2, str2_len);
243 #ifdef DB_UTIL_ENABLE_DEVDEBUG
244 if (result == UCOL_LESS) {
245 DB_UTIL_TRACE_DEBUG("less \n");
246 } else if (result == UCOL_GREATER) {
247 DB_UTIL_TRACE_DEBUG("greater \n");
248 } else if (result == UCOL_EQUAL) {
249 DB_UTIL_TRACE_DEBUG("equal \n");
251 DB_UTIL_TRACE_DEBUG("compare error : %d \n", result);
258 /* The collating function must return an integer that is negative, zero or positive */
259 static int __db_util_collate_icu_8(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
261 UCharIterator uiter1, uiter2;
262 UErrorCode error = U_ZERO_ERROR;
264 #ifdef DB_UTIL_ENABLE_DEVDEBUG
265 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8 func start \n");
266 DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
269 DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
270 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
272 icu_symbol.icu_uiter_setUTF8(&uiter1, (const char *) str1, str1_len);
273 icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
275 UCollationResult result = icu_symbol.icu_ucol_strcollIter(
280 if(U_FAILURE(error)) {
281 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8 ucol_strcollIter error: %d\n", error);
282 return DB_UTIL_ERR_COL_FUNC_RET;
285 #ifdef DB_UTIL_ENABLE_DEVDEBUG
286 if (result == UCOL_LESS) {
287 DB_UTIL_TRACE_DEBUG("less \n");
288 } else if (result == UCOL_GREATER) {
289 DB_UTIL_TRACE_DEBUG("greater \n");
291 DB_UTIL_TRACE_DEBUG("equal \n");
298 static int __db_util_collate_icu_8_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
300 UCharIterator uiter1, uiter2;
301 UErrorCode error = U_ZERO_ERROR;
302 UCollationResult result = 0;
303 char* str_from = (char*)str1;
304 char* str_to = (char*)str1;
305 glong v1_char_len, v2_char_len;
308 #ifdef DB_UTIL_ENABLE_DEVDEBUG
309 DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8_lc func start \n");
310 DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
313 DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
314 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
316 icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
318 v1_char_len = g_utf8_strlen((gchar *)str1,-1);
319 v2_char_len = g_utf8_strlen((gchar *)str2,-1);
321 if(v1_char_len > v2_char_len) {
322 for(i=0;i<v2_char_len;i++) {
323 str_to = g_utf8_next_char(str_to);
326 while((int)(str_to-(char*)str1) <= str1_len) {
327 icu_symbol.icu_uiter_setUTF8(
329 (const char *) str_from,
332 result = icu_symbol.icu_ucol_strcollIter(
337 if (U_FAILURE(error)) {
338 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
339 return DB_UTIL_ERR_COL_FUNC_RET;
342 if(result == UCOL_EQUAL)
343 #ifdef DB_UTIL_ENABLE_DEVDEBUG
345 DB_UTIL_TRACE_DEBUG("equal \n");
352 str_to = g_utf8_next_char(str_to);
353 str_from = g_utf8_next_char(str_from);
358 icu_symbol.icu_uiter_setUTF8(
363 result = icu_symbol.icu_ucol_strcollIter(
368 if (U_FAILURE(error)) {
369 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
370 return DB_UTIL_ERR_COL_FUNC_RET;
374 #ifdef DB_UTIL_ENABLE_DEVDEBUG
375 if (result == UCOL_LESS)
377 DB_UTIL_TRACE_DEBUG("less \n");
379 else if (result == UCOL_GREATER)
381 DB_UTIL_TRACE_DEBUG("greater \n");
383 else if(result == UCOL_EQUAL)
385 DB_UTIL_TRACE_DEBUG("equal \n");
389 DB_UTIL_TRACE_DEBUG("compare error : %d\n", result);
396 static void __db_util_collate_icu_close(void* ucol)
398 DB_UTIL_TRACE_DEBUG("close icu collator\n");
400 DB_UTIL_RET_IF(icu_symbol.icu_ucol_close == NULL);
402 icu_symbol.icu_ucol_close((UCollator *) ucol);
405 static int __db_util_collation_create(sqlite3* db_handle, char* locale, char* collator_name, UCollationStrength ucol_strength_value, int utf_type, int cmp_type)
408 UErrorCode status = U_ZERO_ERROR;
410 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_open == NULL, DB_UTIL_ERR_DLSYM);
411 DB_UTIL_RETV_IF(icu_symbol.icu_ucol_setStrength == NULL, DB_UTIL_ERR_DLSYM);
413 UCollator* ucol = icu_symbol.icu_ucol_open(locale, &status);
414 if(status == U_USING_DEFAULT_WARNING) {
415 DB_UTIL_TRACE_ERROR("ucol_open success with default collate option\n");
416 } else if (U_FAILURE(status)) {
417 DB_UTIL_TRACE_ERROR("ucol_open fail : %d \n", status);
418 return DB_UTIL_ERR_ICU;
420 #ifdef DB_UTIL_ENABLE_DEVDEBUG
423 DB_UTIL_TRACE_DEBUG("ucol_open success : %d \n", status);
427 icu_symbol.icu_ucol_setStrength(ucol, ucol_strength_value);
428 if (U_FAILURE(status)) {
429 DB_UTIL_TRACE_ERROR("ucol_setStrength fail : %d \n", status);
430 return DB_UTIL_ERR_ICU;
432 DB_UTIL_TRACE_DEBUG("ucol_setStrength success \n");
435 if(utf_type == DB_UTIL_COL_UTF8) {
436 if(cmp_type == DB_UTIL_CMP_LC) {
437 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
438 __db_util_collate_icu_8_lc, (void(*)(void*))__db_util_collate_icu_close);
440 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
441 __db_util_collate_icu_8, (void(*)(void*))__db_util_collate_icu_close);
443 } else if(utf_type == DB_UTIL_COL_UTF16) {
444 if(cmp_type == DB_UTIL_CMP_LC) {
445 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
446 __db_util_collate_icu_16_lc, (void(*)(void*))__db_util_collate_icu_close);
448 err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
449 __db_util_collate_icu_16, (void(*)(void*))__db_util_collate_icu_close);
452 DB_UTIL_TRACE_ERROR("wrong utf_type param value : %d\n", utf_type);
453 return DB_UTIL_ERR_PARAM;
456 if (err != SQLITE_OK) {
457 DB_UTIL_TRACE_ERROR("sqlite3_create_collation_v2 fail : %d \n", err);
458 __db_util_collate_icu_close((void*)ucol);
461 DB_UTIL_TRACE_DEBUG("sqlite3_create_collation_v2 success \n");
466 int db_util_create_collation(
467 PARAM_IN sqlite3 *db_handle,
468 PARAM_IN db_util_collate_type type,
469 PARAM_IN db_util_collate_textrep text_type,
470 PARAM_IN char* col_name)
472 int ret = DB_UTIL_OK;
473 UErrorCode status = U_ZERO_ERROR;
474 const char* locale = NULL;
476 if ( (!db_handle) || (!col_name) ) {
477 DB_UTIL_TRACE_ERROR("wrong input param");
478 return DB_UTIL_ERROR;
481 DB_UTIL_TRACE_DEBUG("db_util_create_collation start");
483 ret = __db_util_dl_load_icu();
484 DB_UTIL_RETV_IF(ret != DB_UTIL_OK, DB_UTIL_ERROR);
486 /* get current locale */
487 icu_symbol.icu_uloc_setDefault((const char*)getenv("LANG"), &status);
488 locale = icu_symbol.icu_uloc_getDefault();
490 DB_UTIL_TRACE_WARNING("Fail to get current locale : %d", DB_UTIL_ERR_ENV);
491 return DB_UTIL_ERROR;
493 DB_UTIL_TRACE_DEBUG("locale : %s", locale);
495 #ifdef ENABLE_COL_KO_IC
496 if((db_util_collate_type == DB_UTIL_COL_KO_IC) ||
497 (db_util_collate_type == DB_UTIL_COL_KO_IC_LC)) {
498 if(strncmp(locale, "ko", 2) != 0) {
499 DB_UTIL_TRACE_WARNING("collate type is not match with current locale : %d", DB_UTIL_ERR_ENV);
500 return DB_UTIL_ERROR;
506 case DB_UTIL_COL_UCA :
507 ret = __db_util_collation_create(db_handle, NULL, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
509 case DB_UTIL_COL_LS_AS_CI :
510 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
512 case DB_UTIL_COL_LS_AI_CI :
513 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
515 case DB_UTIL_COL_LS_AI_CI_LC :
516 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
518 #ifdef ENABLE_COL_KO_IC
519 case DB_UTIL_COL_KO_IC :
520 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
522 case DB_UTIL_COL_KO_IC_LC :
523 ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
527 DB_UTIL_TRACE_WARNING("wrong collate input type");
531 if(ret != DB_UTIL_OK)
537 #ifdef DB_UTIL_ENABLE_DEVDEBUG
539 static char *strtoutf8(const UChar * unichars, int len)
541 DB_UTIL_TRACE_WARNING("strtoutf8 start");
545 UErrorCode status = U_ZERO_ERROR;
547 lenstr = lenutf8 = 0;
548 lenstr = sizeof(char) * 4 * (len + 1);
549 pstr = (char *)malloc(lenstr);
550 if (!pstr)return NULL;
551 u_strToUTF8(pstr, lenstr, &lenutf8, unichars, len, &status);
552 if (U_FAILURE(status)) {
553 DB_UTIL_TRACE_WARNING("u_strToUTF8 failed in strtoutf8 :%s\n",
554 u_errorName(status));
557 DB_UTIL_TRACE_WARNING("strtoutf8 out : %s", pstr);