From: Hyunjee Kim Date: Fri, 6 Mar 2020 04:44:36 +0000 (+0900) Subject: [ACR-1518][Ucnvsel] Module implementation X-Git-Tag: accepted/tizen/unified/20200428.125949~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f087e67890a5e7a549bd93a53c03a27f63f49b41;p=platform%2Fcore%2Fapi%2Fbase-utils.git [ACR-1518][Ucnvsel] Module implementation Change-Id: I85f2566b28fe3e80e13234ac87c02ba350ff3faf Signed-off-by: Hyunjee Kim --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b0751f7..279e847 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,6 +45,7 @@ SET(BASEUTILS_SRCS utils_i18n_uscript.c utils_i18n_uidna.c utils_i18n_ucnv.c + utils_i18n_ucnvsel.c utils_i18n_plural_rules.cpp utils_i18n_plural_format.cpp utils_i18n_immutable_idx.cpp @@ -111,5 +112,6 @@ INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_utext.h DESTINAT INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_uscript.h DESTINATION ${INCLUDE_INSTALL_DIR}/base) INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_uidna.h DESTINATION ${INCLUDE_INSTALL_DIR}/base) INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_ucnv.h DESTINATION ${INCLUDE_INSTALL_DIR}/base) +INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_ucnvsel.h DESTINATION ${INCLUDE_INSTALL_DIR}/base) INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n.h DESTINATION ${INCLUDE_INSTALL_DIR}/base) INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/${pc_name}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) diff --git a/src/include/utils_i18n.h b/src/include/utils_i18n.h index bfe2e6e..c3cada7 100644 --- a/src/include/utils_i18n.h +++ b/src/include/utils_i18n.h @@ -57,6 +57,7 @@ #include #include #include +#include /** * @file utils_i18n.h @@ -107,6 +108,7 @@ extern "C" { * - unicode Script Information * - IDNA * - Character conversion + * - Converter selector * * This module provides flexible generation of number or date format patterns and helps you format and parse dates/number for any locale. * The i18n module provides various features based on data from ICU. The following table shows the version of ICU used in each Tizen platform. @@ -289,6 +291,10 @@ extern "C" { * @ref CAPI_BASE_UTILS_I18N_UCNV_MODULE * Character conversion * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * A converter selector is built with a set of encoding/charset names and given an input string returns the set of names of the corresponding converters which can convert the string. + * * * * @section CAPI_BASE_UTILS_I18N_MODULE_MAPPING_TABLE Mapping Table @@ -4843,6 +4849,36 @@ extern "C" { * #i18n_ucnv_is_fixed_width * ucnv_isFixedWidth * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_create + * ucnvsel_open + * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_destroy + * ucnvsel_close + * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_create_from_serialized + * ucnvsel_openFromSerialized + * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_serialize + * ucnvsel_serialize + * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_select_for_string + * ucnvsel_selectForString + * + * + * @ref CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * #i18n_ucnvsel_select_for_utf8 + * ucnvsel_selectForUTF8 + * * */ diff --git a/src/include/utils_i18n_types.h b/src/include/utils_i18n_types.h index f79c504..e06958c 100644 --- a/src/include/utils_i18n_types.h +++ b/src/include/utils_i18n_types.h @@ -4788,6 +4788,21 @@ typedef enum { * @} */ +/** + * @addtogroup CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * @{ + */ + +/** + * @brief An i18n_uconverter_selector_h handle. + * @since_tizen 6.0 + */ +typedef void *i18n_uconverter_selector_h; + +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/src/include/utils_i18n_ucnvsel.h b/src/include/utils_i18n_ucnvsel.h new file mode 100644 index 0000000..8264621 --- /dev/null +++ b/src/include/utils_i18n_ucnvsel.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTILS_I18N_UCNVSEL_H__ +#define __UTILS_I18N_UCNVSEL_H__ + +#include + +/** + * @file utils_i18n_ucnvsel.h + * @version 0.1 + * @brief utils_i18n_ucnvsel + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @ingroup CAPI_BASE_UTILS_I18N_MODULE + * @defgroup CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE Ucnvsel + * @brief C API: Charset Detection API. + * @section CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE_HEADER Required Header + * \#include + * + * @section CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE_OVERVIEW Overview + * @details A converter selector is built with a set of encoding/charset names + * and given an input string returns the set of names of the corresponding converters which can convert the string. + * + * A converter selector can be serialized into a buffer and created from the serialized form. + * + */ + +/** + * @addtogroup CAPI_BASE_UTILS_I18N_UCONVERTER_SELECTOR_MODULE + * @{ + */ + +/** + * @brief Creates an #i18n_uconverter_selector_h. + * @details If converter_list_size is 0, build for all available converters. If excluded_code_points is NULL, don't exclude any code points. + * @since_tizen 6.0 + * @remarks The @a sel should be released using #i18n_ucnvsel_destroy(). + * @param[in] converter_list A pointer to encoding names needed to be involved. Can be NULL if @a converter_list_size ==0. + * The list and the names will be cloned, and the caller retains ownership of the original. + * @param[in] converter_list_size Number of encodings in above list. If 0, builds a selector for all available converters. + * @param[in] excluded_code_points A set of code points to be excluded from consideration. + * That is, excluded code points in a string do not change the selection result. (They might be handled by a callback.) + * Use NULL to exclude nothing. + * @param[in] which_set What converter set to use? Use this to determine whether to consider only roundtrip mappings or also fallbacks. + * @param[out] sel The new selector. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_create(const char *const *converter_list, int32_t converter_list_size, const i18n_uset_h excluded_code_points, const i18n_ucnv_unicode_set_e which_set, i18n_uconverter_selector_h *sel); + +/** + * @brief Destroys an #i18n_uconverter_selector_h. + * @details If any enumerations were returned by i18n_ucnvsel*, they become invalid. + * They can be destroyed before or after calling #i18n_ucnv_destroy, but should never be used after the selector is destroyed. + * @since_tizen 6.0 + * @param[in] sel The selector to destroy. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_destroy(i18n_uconverter_selector_h sel); + +/** + * @brief Creates an #i18n_uconverter_selector_h from its serialized form. + * @details The buffer must remain valid and unchanged for the lifetime of the selector. + * This is much faster than creating a selector from scratch. Using a serialized form from a different machine (endianness/charset) is supported. + * @since_tizen 6.0 + * @remarks The @a sel should be released using #i18n_ucnvsel_destroy(). + * @param[in] buffer Pointer to the serialized form of a converter selector; must be 32-bit-aligned + * @param[in] length The capacity of this buffer (can be equal to or larger than the actual data length) + * @param[out] sel The new selector. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_create_from_serialized(const void *buffer, int32_t length, i18n_uconverter_selector_h *sel); + +/** + * @brief Serializes a selector into a linear buffer. + * @details The serialized form is portable to different machines. + * @since_tizen 6.0 + * @param[in] sel Selector to consider + * @param[in] buffer Pointer to 32-bit-aligned memory to be filled with the serialized form of this converter selector + * @param[in] buffer_capacity The capacity of this buffer + * @param[out] capacity The required buffer capacity to hold serialize data + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_serialize(const i18n_uconverter_selector_h sel, void *buffer, int32_t buffer_capacity, int32_t *capacity); + +/** + * @brief Selects converters that can map all characters in a UTF-16 string, ignoring the excluded code points. + * @since_tizen 6.0 + * @remarks The @a enumeration is valid until @a sel is released. + * @param[in] sel An #i18n_uconvert_selector_h + * @param[in] string UTF-16 string + * @param[in] length Length of the string, or -1 if NUL-terminated + * @param[out] enumeration An enumeration containing encoding names. The returned encoding names and their order will be the same as supplied when building the selector. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_select_for_string(const i18n_uconverter_selector_h sel, const i18n_uchar *string, int32_t length, i18n_uenumeration_h *enumeration); + +/** + * @brief Selects converters that can map all characters in a UTF-8 string, ignoring the excluded code points. + * @since_tizen 6.0 + * @remarks The @a enumeration is valid until @a sel is released. + * @param[in] sel An #i18n_uconvert_selector_h + * @param[in] string UTF-8 string + * @param[in] length Length of the string, or -1 if NUL-terminated + * @param[out] enumeration An enumeration containing encoding names. The returned encoding names and their order will be the same as supplied when building the selector. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * + */ +int i18n_ucnvsel_select_for_utf8(const i18n_uconverter_selector_h sel, const char *string, int32_t length, i18n_uenumeration_h *enumeration); + + +#ifdef __cplusplus +} +#endif + +/** + * @} + * @} + */ +#endif /* __UTILS_I18N_UCNVSEL_H__*/ diff --git a/src/utils_i18n_ucnvsel.c b/src/utils_i18n_ucnvsel.c new file mode 100644 index 0000000..3960296 --- /dev/null +++ b/src/utils_i18n_ucnvsel.c @@ -0,0 +1,106 @@ +/* +* Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include + +#include +#include + +int i18n_ucnvsel_create(const char *const *converter_list, int32_t converter_list_size, const i18n_uset_h excluded_code_points, const i18n_ucnv_unicode_set_e which_set, i18n_uconverter_selector_h *sel) +{ + retv_if(converter_list_size < 0 || (converter_list == NULL && converter_list_size != 0), I18N_ERROR_INVALID_PARAMETER); + + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *sel = ucnvsel_open(converter_list, converter_list_size, excluded_code_points, (UConverterUnicodeSet)which_set, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_ucnvsel_destroy(i18n_uconverter_selector_h sel) +{ + retv_if(sel == NULL, I18N_ERROR_INVALID_PARAMETER); + + ucnvsel_close(sel); + + return I18N_ERROR_NONE; + +} + +int i18n_ucnvsel_create_from_serialized(const void *buffer, int32_t length, i18n_uconverter_selector_h *sel) +{ + retv_if(length <= 0, I18N_ERROR_INVALID_PARAMETER); + retv_if(length < 32, I18N_ERROR_OUT_OF_MEMORY); + + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *sel = ucnvsel_openFromSerialized(buffer, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; + +} + +int i18n_ucnvsel_serialize(const i18n_uconverter_selector_h sel, void *buffer, int32_t buffer_capacity, int32_t *capacity) +{ + retv_if(buffer_capacity < 0, I18N_ERROR_INVALID_PARAMETER); + + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *capacity = ucnvsel_serialize(sel, buffer, buffer_capacity, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; + +} + +int i18n_ucnvsel_select_for_string(const i18n_uconverter_selector_h sel, const i18n_uchar *string, int32_t length, i18n_uenumeration_h *enumeration) +{ + retv_if(sel == NULL || (string == NULL && length != 0), I18N_ERROR_INVALID_PARAMETER); + + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *enumeration = ucnvsel_selectForString(sel, string, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; + +} + +int i18n_ucnvsel_select_for_utf8(const i18n_uconverter_selector_h sel, const char *string, int32_t length, i18n_uenumeration_h *enumeration) +{ + retv_if(sel == NULL || (string == NULL && length != 0), I18N_ERROR_INVALID_PARAMETER); + + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *enumeration = ucnvsel_selectForUTF8(sel, string, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; + +} +