799db55d31b948cdf5536fe781fe04bf3ca9ec10
[platform/core/appfw/libslp-db-util.git] / collation.c
1 /*
2  * libslp-db-util
3  *
4  * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Contact: Hakjoo Ko <hakjoo.ko@samsung.com>
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */
21 #include <string.h>
22 #include <math.h>
23 #include <wchar.h>
24 #include <wctype.h>
25
26 #include <unistd.h>
27 #include <glib.h>
28
29 #include <dlfcn.h>
30
31 #include <unicode/utypes.h>
32 #include <unicode/ucol.h>
33 #include <unicode/uiter.h>
34 #include <unicode/ustring.h>
35
36 #include "collation.h"
37
38 #include "db-util-debug.h"
39
40 #define DB_UTIL_RETV_IF(cond,ret) \
41                         do {\
42                                 if(cond) {\
43                                         DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
44                                         return ret;\
45                                 }\
46                         }while(0)
47
48 #define DB_UTIL_RET_IF(cond) \
49                         do {\
50                                 if(cond) {\
51                                         DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m",__FUNCTION__);\
52                                         return;\
53                                 }\
54                         }while(0)
55
56 #define DB_UTIL_ERR_COL_FUNC_RET DB_UTIL_ERROR
57 #define ICUI18N_LIBPATH "/usr/lib/libicui18n.so"
58
59 enum {
60         DB_UTIL_ERR_DLOPEN = -10,
61         DB_UTIL_ERR_DLSYM,
62         DB_UTIL_ERR_ENV,
63         DB_UTIL_ERR_ICU,
64         DB_UTIL_ERR_PARAM
65 };
66
67 enum {
68         DB_UTIL_CMP_SC,
69         /* Loop comparison */
70         DB_UTIL_CMP_LC
71 };
72
73 #define ICU_FUNC_CNT 8
74
75 typedef UCollator* (*ICU_UCOL_OPEN)(const char *, UErrorCode *);
76 typedef void (*ICU_UCOL_CLOSE)(UCollator *);
77 typedef void (*ICU_UCOL_SETSTRENGTH)(UCollator *, UCollationStrength);
78 typedef UCollationResult (*ICU_UCOL_STRCOLL)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t);
79 typedef UCollationResult (*ICU_UCOL_STRCOLLITER)(const UCollator *, UCharIterator *, UCharIterator *, UErrorCode *);
80 typedef void (*ICU_UITER_SETUTF8)(UCharIterator *, const char *, int32_t);
81 typedef void (*ICU_ULOC_SETDEFAULT)(const char* localeID, UErrorCode* status);
82 typedef const char* (*ICU_ULOC_GETDEFAULT)(void);
83
84 typedef struct {
85         ICU_UCOL_OPEN icu_ucol_open;
86         ICU_UCOL_CLOSE icu_ucol_close;
87         ICU_UCOL_STRCOLL icu_ucol_strcoll;
88         ICU_UCOL_STRCOLLITER icu_ucol_strcollIter;
89         ICU_UCOL_SETSTRENGTH icu_ucol_setStrength;
90         ICU_UITER_SETUTF8 icu_uiter_setUTF8;
91         ICU_ULOC_SETDEFAULT icu_uloc_setDefault;
92         ICU_ULOC_GETDEFAULT icu_uloc_getDefault;
93 } db_util_icu_func_t;
94
95 db_util_icu_func_t icu_symbol;
96
97 void *g_dl_icu_handle = NULL;
98
99 #ifdef DB_UTIL_ENABLE_DEVDEBUG
100 static char *strtoutf8(const UChar * unichars, int len);
101 #endif
102
103 static int __db_util_dl_load_icu()
104 {
105         void *handle = NULL;
106         void *icu_handle[ICU_FUNC_CNT] = { 0 };
107         char *dl_error;
108         int i = 0;
109
110         const char *ICU_API[] = {
111                 "ucol_open",
112                 "ucol_close",
113                 "ucol_strcoll",
114                 "ucol_strcollIter",
115                 "ucol_setStrength",
116                 "uiter_setUTF8",
117                 "uloc_setDefault",
118                 "uloc_getDefault"
119         };
120
121         if(g_dl_icu_handle == NULL) {
122                 g_dl_icu_handle = dlopen(ICUI18N_LIBPATH, RTLD_LAZY | RTLD_GLOBAL);
123                 if(g_dl_icu_handle == NULL) {
124                         DB_UTIL_TRACE_WARNING("dlopen icu so fail");
125                         return DB_UTIL_ERR_DLOPEN;
126                 }
127         }
128
129         for (i = 0; i < ICU_FUNC_CNT; i++) {
130                 handle = dlsym(g_dl_icu_handle, ICU_API[i]);
131                 if ((dl_error = dlerror()) != NULL)  {
132                         DB_UTIL_TRACE_WARNING("dlsym(%s) is failed for %s",
133                                                                 dl_error, ICU_API[i]);
134                         return DB_UTIL_ERR_DLSYM;
135                 }
136                 icu_handle[i] = handle;
137         }
138
139         memcpy((void*)&icu_symbol, (const void*)icu_handle, sizeof(icu_handle));
140
141         return DB_UTIL_OK;
142 }
143
144 /* The collating function must return an integer that is negative, zero or positive */
145 static int __db_util_collate_icu_16(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
146 {
147 #ifdef DB_UTIL_ENABLE_DEVDEBUG
148         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16 func start \n");
149
150         UChar* tmp_v1 = (UChar *)str1;
151         UChar* tmp_v2 = (UChar *)str2;
152         char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
153         char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
154
155         DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
156         DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
157 #endif
158
159         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
160
161         UCollationResult result = icu_symbol.icu_ucol_strcoll(
162                                                                 (UCollator *) ucol,
163                                                                 (const UChar *) str1, str1_len,
164                                                                 (const UChar *) str2, str2_len);
165
166 #ifdef DB_UTIL_ENABLE_DEVDEBUG
167         if(utf8_v1)
168                 free(utf8_v1);
169         if(utf8_v2)
170                 free(utf8_v2);
171
172         if (result == UCOL_LESS) {
173                 DB_UTIL_TRACE_DEBUG("less \n");
174         } else if (result == UCOL_GREATER) {
175                 DB_UTIL_TRACE_DEBUG("greater \n");
176         } else {
177                 DB_UTIL_TRACE_DEBUG("equal \n");
178         }
179 #endif
180
181         return result;
182 }
183
184 /* The collating function must return an integer that is negative, zero or positive */
185 static int __db_util_collate_icu_16_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
186 {
187 #ifdef DB_UTIL_ENABLE_DEVDEBUG
188         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16_lc func start \n");
189
190         UChar* tmp_v1 = (UChar *)str1;
191         UChar* tmp_v2 = (UChar *)str2;
192
193         char* utf8_v1 = strtoutf8(tmp_v1, str1_len);
194         char* utf8_v2 = strtoutf8(tmp_v2, str2_len);
195
196         DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1);
197         DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2);
198
199         if(utf8_v1)
200                 free(utf8_v1);
201         if(utf8_v2)
202                 free(utf8_v2);
203 #endif
204
205         UCollationResult result = 0;
206
207         UChar* str_to = (UChar *)str1;
208         UChar* str_from = (UChar *)str1;
209
210         int i;
211
212         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET);
213
214         if(str1_len> str2_len) {
215                 for(i=0;i<str2_len;i=i+2) {
216                         str_to++;
217                 }
218
219                 while((int)(str_to-(UChar*)str1) <= str1_len) {
220                         result = icu_symbol.icu_ucol_strcoll(
221                                                 (UCollator *) ucol,
222                                                 (UChar *) str_from, str_to-str_from,
223                                                 (const UChar *) str2, str2_len);
224
225                         if (result == UCOL_EQUAL)
226                         {
227 #ifdef DB_UTIL_ENABLE_DEVDEBUG
228                                 DB_UTIL_TRACE_DEBUG("equal \n");
229 #endif
230                                 return UCOL_EQUAL;
231                         }
232
233                         str_to++;
234                         str_from++;
235                 }
236         } else {
237                 result = icu_symbol.icu_ucol_strcoll(
238                                         (UCollator *) ucol,
239                                         (const UChar *) str1, str1_len,
240                                         (const UChar *) str2, str2_len);
241         }
242
243 #ifdef DB_UTIL_ENABLE_DEVDEBUG
244         if (result == UCOL_LESS) {
245                 DB_UTIL_TRACE_DEBUG("less \n");
246         } else if (result == UCOL_GREATER) {
247                 DB_UTIL_TRACE_DEBUG("greater \n");
248         } else if (result == UCOL_EQUAL) {
249                 DB_UTIL_TRACE_DEBUG("equal \n");
250         } else {
251                 DB_UTIL_TRACE_DEBUG("compare error : %d \n", result);
252         }
253 #endif
254
255         return result;
256 }
257
258 /* The collating function must return an integer that is negative, zero or positive */
259 static int __db_util_collate_icu_8(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
260 {
261         UCharIterator uiter1, uiter2;
262         UErrorCode error = U_ZERO_ERROR;
263
264 #ifdef DB_UTIL_ENABLE_DEVDEBUG
265         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8 func start \n");
266         DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
267 #endif
268
269         DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
270         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
271
272         icu_symbol.icu_uiter_setUTF8(&uiter1, (const char *) str1, str1_len);
273         icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
274
275         UCollationResult result = icu_symbol.icu_ucol_strcollIter(
276                                                                 (UCollator *) ucol,
277                                                                 &uiter1,
278                                                                 &uiter2,
279                                                                 &error);
280         if(U_FAILURE(error)) {
281                 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8 ucol_strcollIter error: %d\n", error);
282                 return DB_UTIL_ERR_COL_FUNC_RET;
283         }
284
285 #ifdef DB_UTIL_ENABLE_DEVDEBUG
286         if (result == UCOL_LESS) {
287                 DB_UTIL_TRACE_DEBUG("less \n");
288         } else if (result == UCOL_GREATER) {
289                 DB_UTIL_TRACE_DEBUG("greater \n");
290         } else {
291                 DB_UTIL_TRACE_DEBUG("equal \n");
292         }
293 #endif
294
295         return result;
296 }
297
298 static int __db_util_collate_icu_8_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2)
299 {
300         UCharIterator uiter1, uiter2;
301         UErrorCode error = U_ZERO_ERROR;
302         UCollationResult result = 0;
303         char* str_from = (char*)str1;
304         char* str_to = (char*)str1;
305         glong v1_char_len, v2_char_len;
306         int i;
307
308 #ifdef DB_UTIL_ENABLE_DEVDEBUG
309         DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8_lc func start \n");
310         DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2);
311 #endif
312
313         DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET);
314         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET);
315
316         icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len);
317
318         v1_char_len = g_utf8_strlen((gchar *)str1,-1);
319         v2_char_len = g_utf8_strlen((gchar *)str2,-1);
320
321         if(v1_char_len > v2_char_len) {
322                 for(i=0;i<v2_char_len;i++) {
323                         str_to = g_utf8_next_char(str_to);
324                 }
325
326                 while((int)(str_to-(char*)str1) <= str1_len) {
327                         icu_symbol.icu_uiter_setUTF8(
328                                                 &uiter1,
329                                                 (const char *) str_from,
330                                                 str_to - str_from);
331
332                         result = icu_symbol.icu_ucol_strcollIter(
333                                                 (UCollator *) ucol,
334                                                 &uiter1,
335                                                 &uiter2,
336                                                 &error);
337                         if (U_FAILURE(error)) {
338                                 DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
339                                 return DB_UTIL_ERR_COL_FUNC_RET;
340                         }
341
342                         if(result == UCOL_EQUAL)
343 #ifdef DB_UTIL_ENABLE_DEVDEBUG
344                         {
345                                 DB_UTIL_TRACE_DEBUG("equal \n");
346                                 return UCOL_EQUAL;
347                         }
348 #else
349                                 return UCOL_EQUAL;
350 #endif
351
352                         str_to = g_utf8_next_char(str_to);
353                         str_from = g_utf8_next_char(str_from);
354                 }
355         }
356         else
357         {
358                 icu_symbol.icu_uiter_setUTF8(
359                                         &uiter1,
360                                         (const char *) str1,
361                                         str1_len);
362
363                 result = icu_symbol.icu_ucol_strcollIter(
364                                         (UCollator *) ucol,
365                                         &uiter1,
366                                         &uiter2,
367                                         &error);
368                 if (U_FAILURE(error)) {
369                         DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error);
370                         return DB_UTIL_ERR_COL_FUNC_RET;
371                 }
372         }
373
374 #ifdef DB_UTIL_ENABLE_DEVDEBUG
375         if (result == UCOL_LESS)
376         {
377                 DB_UTIL_TRACE_DEBUG("less \n");
378         }
379         else if (result == UCOL_GREATER)
380         {
381                 DB_UTIL_TRACE_DEBUG("greater \n");
382         }
383         else if(result == UCOL_EQUAL)
384         {
385                 DB_UTIL_TRACE_DEBUG("equal \n");
386         }
387         else
388         {
389                 DB_UTIL_TRACE_DEBUG("compare error : %d\n", result);
390         }
391 #endif
392
393         return result;
394 }
395
396 static void __db_util_collate_icu_close(void* ucol)
397 {
398         DB_UTIL_TRACE_DEBUG("close icu collator\n");
399
400         DB_UTIL_RET_IF(icu_symbol.icu_ucol_close == NULL);
401
402     icu_symbol.icu_ucol_close((UCollator *) ucol);
403 }
404
405 static int __db_util_collation_create(sqlite3* db_handle, char* locale, char* collator_name, UCollationStrength ucol_strength_value, int utf_type, int cmp_type)
406 {
407         int err;
408         UErrorCode status = U_ZERO_ERROR;
409
410         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_open == NULL, DB_UTIL_ERR_DLSYM);
411         DB_UTIL_RETV_IF(icu_symbol.icu_ucol_setStrength == NULL, DB_UTIL_ERR_DLSYM);
412
413         UCollator* ucol = icu_symbol.icu_ucol_open(locale, &status);
414         if(status == U_USING_DEFAULT_WARNING) {
415                 DB_UTIL_TRACE_ERROR("ucol_open success with default collate option\n");
416         } else if (U_FAILURE(status)) {
417                 DB_UTIL_TRACE_ERROR("ucol_open fail : %d \n", status);
418                 return DB_UTIL_ERR_ICU;
419         }
420 #ifdef DB_UTIL_ENABLE_DEVDEBUG
421         else
422         {
423                 DB_UTIL_TRACE_DEBUG("ucol_open success : %d \n", status);
424         }
425 #endif
426
427         icu_symbol.icu_ucol_setStrength(ucol, ucol_strength_value);
428         if (U_FAILURE(status)) {
429                 DB_UTIL_TRACE_ERROR("ucol_setStrength fail : %d \n", status);
430                 return DB_UTIL_ERR_ICU;
431         } else {
432                 DB_UTIL_TRACE_DEBUG("ucol_setStrength success \n");
433         }
434
435         if(utf_type == DB_UTIL_COL_UTF8) {
436                 if(cmp_type == DB_UTIL_CMP_LC) {
437                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
438                 __db_util_collate_icu_8_lc, (void(*)(void*))__db_util_collate_icu_close);
439                 } else {
440                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol,
441                 __db_util_collate_icu_8, (void(*)(void*))__db_util_collate_icu_close);
442                 }
443         } else if(utf_type == DB_UTIL_COL_UTF16) {
444                 if(cmp_type == DB_UTIL_CMP_LC) {
445                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
446                 __db_util_collate_icu_16_lc, (void(*)(void*))__db_util_collate_icu_close);
447                 } else {
448                         err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol,
449                 __db_util_collate_icu_16, (void(*)(void*))__db_util_collate_icu_close);
450                 }
451         } else {
452                 DB_UTIL_TRACE_ERROR("wrong utf_type param value : %d\n", utf_type);
453                 return DB_UTIL_ERR_PARAM;
454         }
455
456         if (err != SQLITE_OK) {
457                 DB_UTIL_TRACE_ERROR("sqlite3_create_collation_v2 fail : %d \n", err);
458                 __db_util_collate_icu_close((void*)ucol);
459                 return err;
460         } else {
461                 DB_UTIL_TRACE_DEBUG("sqlite3_create_collation_v2 success \n");
462                 return DB_UTIL_OK;
463         }
464 }
465
466 int db_util_create_collation(
467         PARAM_IN sqlite3 *db_handle,
468         PARAM_IN db_util_collate_type type,
469         PARAM_IN db_util_collate_textrep text_type,
470         PARAM_IN char* col_name)
471 {
472         int ret = DB_UTIL_OK;
473         UErrorCode status = U_ZERO_ERROR;
474         const char* locale = NULL;
475
476         if ( (!db_handle) || (!col_name) ) {
477                 DB_UTIL_TRACE_ERROR("wrong input param");
478                 return DB_UTIL_ERROR;
479         }
480
481         DB_UTIL_TRACE_DEBUG("db_util_create_collation start");
482
483         ret = __db_util_dl_load_icu();
484         DB_UTIL_RETV_IF(ret != DB_UTIL_OK, DB_UTIL_ERROR);
485
486         /* get current locale */
487         icu_symbol.icu_uloc_setDefault((const char*)getenv("LANG"), &status);
488         locale = icu_symbol.icu_uloc_getDefault();
489         if(locale == NULL) {
490                 DB_UTIL_TRACE_WARNING("Fail to get current locale : %d", DB_UTIL_ERR_ENV);
491                 return DB_UTIL_ERROR;
492         }
493         DB_UTIL_TRACE_DEBUG("locale : %s", locale);
494
495 #ifdef ENABLE_COL_KO_IC
496         if((db_util_collate_type == DB_UTIL_COL_KO_IC) ||
497                 (db_util_collate_type == DB_UTIL_COL_KO_IC_LC)) {
498                 if(strncmp(locale, "ko", 2) != 0) {
499                         DB_UTIL_TRACE_WARNING("collate type is not match with current locale : %d", DB_UTIL_ERR_ENV);
500                         return DB_UTIL_ERROR;
501                 }
502         }
503 #endif
504
505         switch(type) {
506                 case DB_UTIL_COL_UCA :
507                         ret = __db_util_collation_create(db_handle, NULL, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
508                         break;
509                 case DB_UTIL_COL_LS_AS_CI :
510                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC);
511                         break;
512                 case DB_UTIL_COL_LS_AI_CI :
513                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
514                         break;
515                 case DB_UTIL_COL_LS_AI_CI_LC :
516                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
517                         break;
518 #ifdef ENABLE_COL_KO_IC
519                 case DB_UTIL_COL_KO_IC :
520                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC);
521                         break;
522                 case DB_UTIL_COL_KO_IC_LC :
523                         ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC);
524                         break;
525 #endif
526                 default :
527                         DB_UTIL_TRACE_WARNING("wrong collate input type");
528                         ret = DB_UTIL_ERROR;
529         }
530
531         if(ret != DB_UTIL_OK)
532                 ret = DB_UTIL_ERROR;
533
534         return ret;
535 }
536
537 #ifdef DB_UTIL_ENABLE_DEVDEBUG
538
539 static char *strtoutf8(const UChar * unichars, int len)
540 {
541         DB_UTIL_TRACE_WARNING("strtoutf8 start");
542
543         int lenstr, lenutf8;
544         char *pstr = NULL;
545         UErrorCode status = U_ZERO_ERROR;
546
547         lenstr = lenutf8 = 0;
548         lenstr = sizeof(char) * 4 * (len + 1);
549         pstr = (char *)malloc(lenstr);
550         if (!pstr)return NULL;
551         u_strToUTF8(pstr, lenstr, &lenutf8, unichars, len, &status);
552         if (U_FAILURE(status)) {
553                 DB_UTIL_TRACE_WARNING("u_strToUTF8 failed in strtoutf8 :%s\n",
554                        u_errorName(status));
555                 return NULL;
556         }
557         DB_UTIL_TRACE_WARNING("strtoutf8 out : %s", pstr);
558         return pstr;
559 }
560
561 #endif
562