From 65c328908e611a4564cdf6f770cd48aaa94b1684 Mon Sep 17 00:00:00 2001 From: Gukhwan Cho Date: Thu, 15 Oct 2015 20:17:47 +0900 Subject: [PATCH] use Hangul Compatibility Jamo for korean index Change-Id: Ice10d16f9d804ecdc8f27f9edd4ec7e921f8d347 Signed-off-by: Gukhwan Cho --- server/ctsvc_localize.c | 18 +++++++++++++ server/ctsvc_localize.h | 3 ++- server/ctsvc_localize_kor.c | 65 +++++++++++++++++++++++++++++++++++++++------ server/ctsvc_localize_kor.h | 1 + server/ctsvc_normalize.c | 12 +++++---- 5 files changed, 85 insertions(+), 14 deletions(-) diff --git a/server/ctsvc_localize.c b/server/ctsvc_localize.c index 72f5905..e1a1342 100644 --- a/server/ctsvc_localize.c +++ b/server/ctsvc_localize.c @@ -141,6 +141,24 @@ void ctsvc_extra_normalize(UChar *word, int32_t word_size) } } +void ctsvc_extra_index_normalize(UChar *word, int32_t word_size) +{ + int i; + for (i=0;i halfwidth + if (CTSVC_COMPARE_BETWEEN((UChar)0xFF00, word[i], (UChar)0xFF60)) { + int unicode_value1 = 0; + int unicode_value2 = 0; + unicode_value1 = 0x0; + unicode_value2 = (0xFF & word[i]) + 0x20; + word[i] = unicode_value1 << 8 | unicode_value2; + } + else if (ctsvc_is_hangul(word[i])) { + ctsvc_hangul_jamo2compatibility(&word[i]); + } + } +} + const char *ctsvc_get_language_locale(int lang) { char *langset = ctsvc_get_langset(); diff --git a/server/ctsvc_localize.h b/server/ctsvc_localize.h index d5e2186..eb5be83 100644 --- a/server/ctsvc_localize.h +++ b/server/ctsvc_localize.h @@ -65,5 +65,6 @@ int ctsvc_get_sort_type_from_language(int language); int ctsvc_get_language_type(const char *system_lang); const char *ctsvc_get_language_locale(int lang); void ctsvc_extra_normalize(UChar *word, int32_t word_size); +void ctsvc_extra_index_normalize(UChar *word, int32_t word_size); -#endif /* __CTSVC_LOCALIZE_H__ */ \ No newline at end of file +#endif /* __CTSVC_LOCALIZE_H__ */ diff --git a/server/ctsvc_localize_kor.c b/server/ctsvc_localize_kor.c index c087d68..92f10c2 100644 --- a/server/ctsvc_localize_kor.c +++ b/server/ctsvc_localize_kor.c @@ -30,6 +30,9 @@ #include "ctsvc_localize_kor.h" +/* korean -Hangul Jamo */ +#define CTSVC_HAN_J_START (UChar)0x1100 +#define CTSVC_HAN_J_END (UChar)0x11FF /* korean -Hangul Jamo extended A*/ #define CTSVC_JAMO_A_START (UChar)0xA960 @@ -53,19 +56,19 @@ static const char hangul_compatibility_choseong[] = { - 0x31, 0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41, + 0x32, 0x34, 0x37, 0x38, 0x39, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x65, 0x66, 0x6E, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, - 0x81, 0x84, 0x85, 0x86, 0x00}; + 0x81, 0x84, 0x85, 0x86, 0x31, 0x00}; -static const unsigned char hangul_jamo_choseong[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07, /* to choseong 0x1100~0x115F */ +static const char hangul_jamo_choseong[] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x1A, 0x06, 0x07, /* to choseong 0x1100~0x115F */ 0x08, 0x21, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x14, 0x15, 0x1C, 0x1D, 0x1E, 0x20, 0x22, 0x23, 0x27, 0x29, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, - 0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00}; + 0x32, 0x36, 0x40, 0x47, 0x4C, 0x57, 0x58, 0x59, 0x00, 0x00}; static const char hangul_compatibility_jungseong[] = { 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, @@ -73,7 +76,7 @@ static const char hangul_compatibility_jungseong[] = { 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x00}; -static const unsigned char hangul_jamo_jungseong[] = { +static const char hangul_jamo_jungseong[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, /* to jungseong 0x1160~0x11A7 */ 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x60, 0x84, 0x85, 0x88, 0x91, 0x92, @@ -84,9 +87,9 @@ static const char hangul_compatibility_jongseong[] = { 0x3F, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6F, 0x70, 0x82, 0x83, 0x00}; -static const unsigned char hangul_jamo_jongseong[] = { +static const char hangul_jamo_jongseong[] = { 0xAA, 0xAC, 0xAD, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, /* to jongseong 0x11A8~0x11FF */ - 0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDF, 0xF1, 0xF2, 0x00}; + 0xC7, 0xC8, 0xCC, 0xCE, 0xD3, 0xD7, 0xD9, 0xDD, 0xDF, 0xF1, 0xF2, 0x00}; static inline bool is_chosung(UChar src) { @@ -171,6 +174,52 @@ void ctsvc_hangul_compatibility2jamo(UChar *src) } } +void ctsvc_hangul_jamo2compatibility(UChar *src) +{ + int unicode_value1 = 0; + int unicode_value2 = 0; + + unicode_value1 = (0xFF00 & (*src)) >> 8; + unicode_value2 = (0xFF & (*src)); + + /* korean -Hangul Jamo halfwidth*/ + if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_HALF_START, *src, CTSVC_HAN_HALF_END)) { + unicode_value1 = 0x31; + + if (unicode_value2 < 0xBF) + unicode_value2 -= 0x70; + else if (unicode_value2 < 0xC8) + unicode_value2 -= 0x73; + else if (unicode_value2 < 0xD0) + unicode_value2 -= 0x75; + else if (unicode_value2 < 0xD8) + unicode_value2 -= 0x77; + else + unicode_value2 -= 0x79; + + (*src) = unicode_value1 << 8 | unicode_value2; + } + + if (CTSVC_COMPARE_BETWEEN(CTSVC_HAN_J_START, *src, CTSVC_HAN_J_END)) { + char *pos; + if ((pos = strchr(hangul_jamo_choseong, unicode_value2))) { + unicode_value1 = 0x31; + unicode_value2 = hangul_compatibility_choseong[pos - hangul_jamo_choseong]; + (*src) = unicode_value1 << 8 | unicode_value2; + } + else if ((pos = strchr(hangul_jamo_jungseong, unicode_value2))) { + unicode_value1 = 0x31; + unicode_value2 = hangul_compatibility_jungseong[pos - hangul_jamo_jungseong]; + (*src) = unicode_value1 << 8 | unicode_value2; + } + else if ((pos = strchr(hangul_jamo_jongseong, unicode_value2))) { + unicode_value1 = 0x31; + unicode_value2 = hangul_compatibility_jongseong[pos - hangul_jamo_jongseong]; + (*src) = unicode_value1 << 8 | unicode_value2; + } + } +} + int ctsvc_get_chosung(const char *src, char *dest, int dest_size) { int32_t size; diff --git a/server/ctsvc_localize_kor.h b/server/ctsvc_localize_kor.h index 5c8c922..da2b983 100644 --- a/server/ctsvc_localize_kor.h +++ b/server/ctsvc_localize_kor.h @@ -27,5 +27,6 @@ bool ctsvc_has_chosung(const char *src); bool ctsvc_is_chosung(const char *src); bool ctsvc_is_hangul(UChar src); void ctsvc_hangul_compatibility2jamo(UChar *src); +void ctsvc_hangul_jamo2compatibility(UChar *src); #endif /* __CTSVC_LOCALIZE_KOR_H__ */ \ No newline at end of file diff --git a/server/ctsvc_normalize.c b/server/ctsvc_normalize.c index 268d9a3..86a742a 100644 --- a/server/ctsvc_normalize.c +++ b/server/ctsvc_normalize.c @@ -156,7 +156,7 @@ int ctsvc_collation_str(char *src, char **dest) return __ctsvc_collation_str(temp, dest); } -static int __ctsvc_normalize_str(const char *src, char **dest) +static int __ctsvc_normalize_str(const char *src, char **dest, bool is_index) { int32_t tmp_size = 100; int32_t upper_size; @@ -266,7 +266,10 @@ static int __ctsvc_normalize_str(const char *src, char **dest) } ctsvc_check_language(result); - ctsvc_extra_normalize(result, size); + if (is_index) + ctsvc_extra_index_normalize(result, size); + else + ctsvc_extra_normalize(result, size); /* remove diacritical : U+3000 ~ U+034F */ int i, j; @@ -429,7 +432,7 @@ int ctsvc_normalize_str(const char *src, char **dest) ret = __ctsvc_remove_special_char(src, temp, strlen(src) + 1); RETVM_IF(ret < CONTACTS_ERROR_NONE, ret, "__ctsvc_remove_special_char() Fail(%d)", ret); - ret = __ctsvc_normalize_str(temp, dest); + ret = __ctsvc_normalize_str(temp, dest, false); return ret; } @@ -483,8 +486,7 @@ int ctsvc_normalize_index(const char *src, char **dest) return CONTACTS_ERROR_INVALID_PARAMETER; } } - ret = __ctsvc_normalize_str(first_str, dest); - + ret = __ctsvc_normalize_str(first_str, dest, true); RETVM_IF(dest == NULL, ret, "__ctsvc_normalize_str() Fail"); if ((*dest)[0] != '\0') { -- 2.7.4