Initialize Tizen 2.3
[framework/appfw/libslp-db-util.git] / collation.c
1 /*
2  * libslp-db-util
3  *
4  * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Contact: Hakjoo Ko <hakjoo.ko@samsung.com>
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */
21 #include <string.h>
22 #include <math.h>
23 #include <wchar.h>
24 #include <wctype.h>
25
26 #include <unistd.h>
27 #include <glib.h>
28
29 #include <dlfcn.h>
30
31 #include <unicode/utypes.h>
32 #include <unicode/ucol.h>
33 #include <unicode/uiter.h>
34 #include <unicode/ustring.h>
35
36 #include "collation.h"
37
38 #include "db-util-debug.h"
39
40 #define DB_UTIL_RETV_IF(cond,ret) \
41                         do {\
42                                 if(cond) {\
43                                         DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
44                                         return ret;\
45                                 }\
46                         }while(0)
47
48 #define DB_UTIL_RET_IF(cond) \
49                         do {\
50                                 if(cond) {\
51                                         DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
52                                         return;\
53                                 }\
54                         }while(0)
55
56 #define DB_UTIL_ERR_COL_FUNC_RET DB_UTIL_ERROR
57
58 enum {
59         DB_UTIL_ERR_DLOPEN = -10,
60         DB_UTIL_ERR_DLSYM,
61         DB_UTIL_ERR_ENV,
62         DB_UTIL_ERR_ICU,
63         DB_UTIL_ERR_PARAM
64 };
65
66 enum {
67         DB_UTIL_CMP_SC,
68         /* Loop comparison */
69         DB_UTIL_CMP_LC
70 };
71
72 #define ICU_FUNC_CNT 8
73
74 typedef UCollator* (*ICU_UCOL_OPEN)(const char *, UErrorCode *);
75 typedef void (*ICU_UCOL_CLOSE)(UCollator *);
76 typedef void (*ICU_UCOL_SETSTRENGTH)(UCollator *, UCollationStrength);
77 typedef UCollationResult (*ICU_UCOL_STRCOLL)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t);
78 typedef UCollationResult (*ICU_UCOL_STRCOLLITER)(const UCollator *, UCharIterator *, UCharIterator *, UErrorCode *);
79 typedef void (*ICU_UITER_SETUTF8)(UCharIterator *, const char *, int32_t);
80 typedef void (*ICU_ULOC_SETDEFAULT)(const char* localeID, UErrorCode* status);
81 typedef const char* (*ICU_ULOC_GETDEFAULT)(void);
82
83 typedef struct {
84         ICU_UCOL_OPEN icu_ucol_open;
85         ICU_UCOL_CLOSE icu_ucol_close;
86         ICU_UCOL_STRCOLL icu_ucol_strcoll;
87         ICU_UCOL_STRCOLLITER icu_ucol_strcollIter;
88         ICU_UCOL_SETSTRENGTH icu_ucol_setStrength;
89         ICU_UITER_SETUTF8 icu_uiter_setUTF8;
90         ICU_ULOC_SETDEFAULT icu_uloc_setDefault;
91         ICU_ULOC_GETDEFAULT icu_uloc_getDefault;
92 } db_util_icu_func_t;
93
94 db_util_icu_func_t icu_symbol;
95
96 void *g_dl_icu_handle = NULL;
97
98 #ifdef DB_UTIL_ENABLE_DEVDEBUG
99 static char *strtoutf8(const UChar * unichars, int len);
100 #endif
101
102 static int __db_util_dl_load_icu()
103 {
104         void *handle = NULL;
105         void *icu_handle[ICU_FUNC_CNT] = { 0 };
106         char *dl_error;
107         int i = 0;
108
109         const char *ICU_API[] = {
110                 "ucol_open",
111                 "ucol_close",
112                 "ucol_strcoll",
113                 "ucol_strcollIter",
114                 "ucol_setStrength",
115                 "uiter_setUTF8",
116                 "uloc_setDefault",
117                 "uloc_getDefault"
118         };
119
120         if(g_dl_icu_handle == NULL) {
121                 g_dl_icu_handle = dlopen("libicui18n.so", RTLD_LAZY | RTLD_GLOBAL);
122                 if(g_dl_icu_handle == NULL) {
123                         DB_UTIL_TRACE_WARNING("dlopen icu so fail");
124                         return DB_UTIL_ERR_DLOPEN;
125                 }
126         }
127
128         for (i = 0; i < ICU_FUNC_CNT; i++) {
129                 handle = dlsym(g_dl_icu_handle, ICU_API[i]);
130                 if ((dl_error = dlerror()) != NULL)  {
131                         DB_UTIL_TRACE_WARNING("dlsym(%s) is failed for %s",
132                                                                 dl_error, ICU_API[i]);
133                         return DB_UTIL_ERR_DLSYM;
134                 }
135                 icu_handle[i] = handle;
136         }
137
138         memcpy((void*)&icu_symbol, (const void*)icu_handle, sizeof(icu_handle));
139
140         return DB_UTIL_OK;
141 }
142
143 /* The collating function must return an integer that is negative, zero or positive */
144 static int __db_util_collate_icu_16(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
145 {
146 #ifdef DB_UTIL_ENABLE_DEVDEBUG
147         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16 func start \n");
148
149         UChar* tmp_v1 = (UChar *)str1;
150         UChar* tmp_v2 = (UChar *)str2;
151         char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
152         char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
153
154         DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
155         DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
156 #endif
157
158         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
159
160         UCollationResult result = icu_symbol.icu_ucol_strcoll(
161                                                                 (UCollator *) ucol,
162                                                                 (const UChar *) str1, str1_len,
163                                                                 (const UChar *) str2, str2_len);
164
165 #ifdef DB_UTIL_ENABLE_DEVDEBUG
166         if(utf8_v1)
167                 free(utf8_v1);
168         if(utf8_v2)
169                 free(utf8_v2);
170
171         if (result == UCOL_LESS) {
172                 DB_UTIL_TRACE_DEBUG("less \n");
173         } else if (result == UCOL_GREATER) {
174                 DB_UTIL_TRACE_DEBUG("greater \n");
175         } else {
176                 DB_UTIL_TRACE_DEBUG("equal \n");
177         }
178 #endif
179
180         return result;
181 }
182
183 /* The collating function must return an integer that is negative, zero or positive */
184 static int __db_util_collate_icu_16_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
185 {
186 #ifdef DB_UTIL_ENABLE_DEVDEBUG
187         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16_lc func start \n");
188
189         UChar* tmp_v1 = (UChar *)str1;
190         UChar* tmp_v2 = (UChar *)str2;
191
192         char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
193         char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
194
195         DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
196         DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
197
198         if(utf8_v1)
199                 free(utf8_v1);
200         if(utf8_v2)
201                 free(utf8_v2);
202 #endif
203
204         UCollationResult result = 0;
205
206         UChar* str_to = (UChar *)str1;
207         UChar* str_from = (UChar *)str1;
208
209         int i;
210
211         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
212
213         if(str1_len> str2_len) {
214                 for(i=0;i<str2_len;i=i+2) {
215                         str_to++;
216                 }
217
218                 while((int)(str_to-(UChar*)str1) <= str1_len) {
219                         result = icu_symbol.icu_ucol_strcoll(
220                                                 (UCollator *) ucol,
221                                                 (UChar *) str_from, str_to-str_from,
222                                                 (const UChar *) str2, str2_len);
223
224                         if (result == UCOL_EQUAL)
225                         {
226 #ifdef DB_UTIL_ENABLE_DEVDEBUG
227                                 DB_UTIL_TRACE_DEBUG("equal \n");
228 #endif
229                                 return UCOL_EQUAL;
230                         }
231
232                         str_to++;
233                         str_from++;
234                 }
235         } else {
236                 result = icu_symbol.icu_ucol_strcoll(
237                                         (UCollator *) ucol,
238                                         (const UChar *) str1, str1_len,
239                                         (const UChar *) str2, str2_len);
240         }
241
242 #ifdef DB_UTIL_ENABLE_DEVDEBUG
243         if (result == UCOL_LESS) {
244                 DB_UTIL_TRACE_DEBUG("less \n");
245         } else if (result == UCOL_GREATER) {
246                 DB_UTIL_TRACE_DEBUG("greater \n");
247         } else if (result == UCOL_EQUAL) {
248                 DB_UTIL_TRACE_DEBUG("equal \n");
249         } else {
250                 DB_UTIL_TRACE_DEBUG("compare error : %d \n", result);
251         }
252 #endif
253
254         return result;
255 }
256
257 /* The collating function must return an integer that is negative, zero or positive */
258 static int __db_util_collate_icu_8(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
259 {
260         UCharIterator uiter1, uiter2;
261         UErrorCode error = U_ZERO_ERROR;
262
263 #ifdef DB_UTIL_ENABLE_DEVDEBUG
264         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8 func start \n");
265         DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
266 #endif
267
268         DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
269         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
270
271         icu_symbol.icu_uiter_setUTF8(&uiter1, (const char *) str1, str1_len);
272         icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
273
274         UCollationResult result = icu_symbol.icu_ucol_strcollIter(
275                                                                 (UCollator *) ucol,
276                                                                 &uiter1,
277                                                                 &uiter2,
278                                                                 &error);
279         if(U_FAILURE(error)) {
280                 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8 ucol_strcollIter error: %d\n", error);
281                 return DB_UTIL_ERR_COL_FUNC_RET;
282         }
283
284 #ifdef DB_UTIL_ENABLE_DEVDEBUG
285         if (result == UCOL_LESS) {
286                 DB_UTIL_TRACE_DEBUG("less \n");
287         } else if (result == UCOL_GREATER) {
288                 DB_UTIL_TRACE_DEBUG("greater \n");
289         } else {
290                 DB_UTIL_TRACE_DEBUG("equal \n");
291         }
292 #endif
293
294         return result;
295 }
296
297 static int __db_util_collate_icu_8_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
298 {
299         UCharIterator uiter1, uiter2;
300         UErrorCode error = U_ZERO_ERROR;
301         UCollationResult result = 0;
302         char* str_from = (char*)str1;
303         char* str_to = (char*)str1;
304         glong v1_char_len, v2_char_len;
305         int i;
306
307 #ifdef DB_UTIL_ENABLE_DEVDEBUG
308         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8_lc func start \n");
309         DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
310 #endif
311
312         DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
313         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
314
315         icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
316
317         v1_char_len = g_utf8_strlen((gchar *)str1,-1);
318         v2_char_len = g_utf8_strlen((gchar *)str2,-1);
319
320         if(v1_char_len > v2_char_len) {
321                 for(i=0;i<v2_char_len;i++) {
322                         str_to = g_utf8_next_char(str_to);
323                 }
324
325                 while((int)(str_to-(char*)str1) <= str1_len) {
326                         icu_symbol.icu_uiter_setUTF8(
327                                                 &uiter1,
328                                                 (const char *) str_from,
329                                                 str_to - str_from);
330
331                         result = icu_symbol.icu_ucol_strcollIter(
332                                                 (UCollator *) ucol,
333                                                 &uiter1,
334                                                 &uiter2,
335                                                 &error);
336                         if (U_FAILURE(error)) {
337                                 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
338                                 return DB_UTIL_ERR_COL_FUNC_RET;
339                         }
340
341                         if(result == UCOL_EQUAL)
342 #ifdef DB_UTIL_ENABLE_DEVDEBUG
343                         {
344                                 DB_UTIL_TRACE_DEBUG("equal \n");
345                                 return UCOL_EQUAL;
346                         }
347 #else
348                                 return UCOL_EQUAL;
349 #endif
350
351                         str_to = g_utf8_next_char(str_to);
352                         str_from = g_utf8_next_char(str_from);
353                 }
354         }
355         else
356         {
357                 icu_symbol.icu_uiter_setUTF8(
358                                         &uiter1,
359                                         (const char *) str1,
360                                         str1_len);
361
362                 result = icu_symbol.icu_ucol_strcollIter(
363                                         (UCollator *) ucol,
364                                         &uiter1,
365                                         &uiter2,
366                                         &error);
367                 if (U_FAILURE(error)) {
368                         DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
369                         return DB_UTIL_ERR_COL_FUNC_RET;
370                 }
371         }
372
373 #ifdef DB_UTIL_ENABLE_DEVDEBUG
374         if (result == UCOL_LESS)
375         {
376                 DB_UTIL_TRACE_DEBUG("less \n");
377         }
378         else if (result == UCOL_GREATER)
379         {
380                 DB_UTIL_TRACE_DEBUG("greater \n");
381         }
382         else if(result == UCOL_EQUAL)
383         {
384                 DB_UTIL_TRACE_DEBUG("equal \n");
385         }
386         else
387         {
388                 DB_UTIL_TRACE_DEBUG("compare error : %d\n", result);
389         }
390 #endif
391
392         return result;
393 }
394
395 static void __db_util_collate_icu_close(void* ucol)
396 {
397         DB_UTIL_TRACE_DEBUG("close icu collator\n");
398
399         DB_UTIL_RET_IF(icu_symbol.icu_ucol_close == NULL);
400
401     icu_symbol.icu_ucol_close((UCollator *) ucol);
402 }
403
404 static int __db_util_collation_create(sqlite3* db_handle, char* locale, char* collator_name, UCollationStrength ucol_strength_value, int utf_type, int cmp_type)
405 {
406         int err;
407         UErrorCode status = U_ZERO_ERROR;
408
409         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_open == NULL, DB_UTIL_ERR_DLSYM);
410         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_setStrength == NULL, DB_UTIL_ERR_DLSYM);
411
412         UCollator* ucol = icu_symbol.icu_ucol_open(locale, &status);
413         if(status == U_USING_DEFAULT_WARNING) {
414                 DB_UTIL_TRACE_ERROR("ucol_open success with default collate option\n");
415         } else if (U_FAILURE(status)) {
416                 DB_UTIL_TRACE_ERROR("ucol_open fail : %d \n", status);
417                 return DB_UTIL_ERR_ICU;
418         }
419 #ifdef DB_UTIL_ENABLE_DEVDEBUG
420         else
421         {
422                 DB_UTIL_TRACE_DEBUG("ucol_open success : %d \n", status);
423         }
424 #endif
425
426         icu_symbol.icu_ucol_setStrength(ucol, ucol_strength_value);
427         if (U_FAILURE(status)) {
428                 DB_UTIL_TRACE_ERROR("ucol_setStrength fail : %d \n", status);
429                 return DB_UTIL_ERR_ICU;
430         } else {
431                 DB_UTIL_TRACE_DEBUG("ucol_setStrength success \n");
432         }
433
434         if(utf_type == DB_UTIL_COL_UTF8) {
435                 if(cmp_type == DB_UTIL_CMP_LC) {
436                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
437                 __db_util_collate_icu_8_lc, (void(*)(void*))__db_util_collate_icu_close);
438                 } else {
439                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
440                 __db_util_collate_icu_8, (void(*)(void*))__db_util_collate_icu_close);
441                 }
442         } else if(utf_type == DB_UTIL_COL_UTF16) {
443                 if(cmp_type == DB_UTIL_CMP_LC) {
444                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
445                 __db_util_collate_icu_16_lc, (void(*)(void*))__db_util_collate_icu_close);
446                 } else {
447                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
448                 __db_util_collate_icu_16, (void(*)(void*))__db_util_collate_icu_close);
449                 }
450         } else {
451                 DB_UTIL_TRACE_ERROR("wrong utf_type param value : %d\n", utf_type);
452                 return DB_UTIL_ERR_PARAM;
453         }
454
455         if (err != SQLITE_OK) {
456                 DB_UTIL_TRACE_ERROR("sqlite3_create_collation_v2 fail : %d \n", err);
457                 __db_util_collate_icu_close((void*)ucol);
458                 return err;
459         } else {
460                 DB_UTIL_TRACE_DEBUG("sqlite3_create_collation_v2 success \n");
461                 return DB_UTIL_OK;
462         }
463 }
464
465 int db_util_create_collation(
466         PARAM_IN sqlite3 *db_handle,
467         PARAM_IN db_util_collate_type type,
468         PARAM_IN db_util_collate_textrep text_type,
469         PARAM_IN char* col_name)
470 {
471         int ret = DB_UTIL_OK;
472         UErrorCode status = U_ZERO_ERROR;
473         const char* locale = NULL;
474
475         DB_UTIL_TRACE_DEBUG("db_util_create_collation start");
476
477         ret = __db_util_dl_load_icu();
478         DB_UTIL_RETV_IF(ret != DB_UTIL_OK, DB_UTIL_ERROR);
479
480         /* get current locale */
481         icu_symbol.icu_uloc_setDefault((const char*)getenv("LANG"), &status);
482         locale = icu_symbol.icu_uloc_getDefault();
483         if(locale == NULL) {
484                 DB_UTIL_TRACE_WARNING("Fail to get current locale : %d", DB_UTIL_ERR_ENV);
485                 return DB_UTIL_ERROR;
486         }
487         DB_UTIL_TRACE_DEBUG("locale : %s", locale);
488
489 #ifdef ENABLE_COL_KO_IC
490         if((db_util_collate_type == DB_UTIL_COL_KO_IC) ||
491                 (db_util_collate_type == DB_UTIL_COL_KO_IC_LC)) {
492                 if(strncmp(locale, "ko", 2) != 0) {
493                         DB_UTIL_TRACE_WARNING("collate type is not match with current locale : %d", DB_UTIL_ERR_ENV);
494                         return DB_UTIL_ERROR;
495                 }
496         }
497 #endif
498
499         switch(type) {
500                 case DB_UTIL_COL_UCA :
501                         ret = __db_util_collation_create(db_handle, NULL, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
502                         break;
503                 case DB_UTIL_COL_LS_AS_CI :
504                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
505                         break;
506                 case DB_UTIL_COL_LS_AI_CI :
507                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
508                         break;
509                 case DB_UTIL_COL_LS_AI_CI_LC :
510                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
511                         break;
512 #ifdef ENABLE_COL_KO_IC
513                 case DB_UTIL_COL_KO_IC :
514                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
515                         break;
516                 case DB_UTIL_COL_KO_IC_LC :
517                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
518                         break;
519 #endif
520                 default :
521                         DB_UTIL_TRACE_WARNING("wrong collate input type");
522         }
523
524         if(ret != DB_UTIL_OK)
525                 ret = DB_UTIL_ERROR;
526
527         return ret;
528 }
529
530 #ifdef DB_UTIL_ENABLE_DEVDEBUG
531
532 static char *strtoutf8(const UChar * unichars, int len)
533 {
534         DB_UTIL_TRACE_WARNING("strtoutf8 start");
535
536         int lenstr, lenutf8;
537         char *pstr = NULL;
538         UErrorCode status = U_ZERO_ERROR;
539
540         lenstr = lenutf8 = 0;
541         lenstr = sizeof(char) * 4 * (len + 1);
542         pstr = (char *)malloc(lenstr);
543         if (!pstr)return NULL;
544         u_strToUTF8(pstr, lenstr, &lenutf8, unichars, len, &status);
545         if (U_FAILURE(status)) {
546                 DB_UTIL_TRACE_WARNING("u_strToUTF8 failed in strtoutf8 :%s\n",
547                        u_errorName(status));
548                 return NULL;
549         }
550         DB_UTIL_TRACE_WARNING("strtoutf8 out : %s", pstr);
551         return pstr;
552 }
553
554 #endif
555