From: Hyunjee Kim Date: Fri, 17 Jan 2020 07:04:44 +0000 (+0900) Subject: [ACR-1493][UNorm2] Module implementation X-Git-Tag: accepted/tizen/unified/20200210.131746^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fchanges%2F82%2F222682%2F8;p=platform%2Fcore%2Fapi%2Fbase-utils.git [ACR-1493][UNorm2] Module implementation Change-Id: Ic9fceaea123ebb99774bfff2764be7a988f01884 Signed-off-by: Hyunjee Kim --- diff --git a/src/include/utils_i18n.h b/src/include/utils_i18n.h index 956055b..70de9a0 100644 --- a/src/include/utils_i18n.h +++ b/src/include/utils_i18n.h @@ -254,6 +254,8 @@ extern "C" { * * @ref CAPI_BASE_UTILS_I18N_SIMPLE_DATE_FORMAT_MODULE * The Simple Date Format module provides API for formatting and parsing dates in language-independent manner. + * + * * @ref CAPI_BASE_UTILS_I18N_LOCALE_DISPLAY_NAMES_MODULE * The locale display names module returns display names of Locales and components of Locales. * @@ -1648,6 +1650,101 @@ extern "C" { * unorm2_normalize * * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_NFC_instance + * unorm2_getNFCInstance + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_NFD_instance + * unorm2_getNFDInstance + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_NFKC_instance + * unorm2_getNFKCInstance + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_NFKD_instance + * unorm2_getNFKDInstance + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_NFKC_casefold_instance + * unorm2_getNFKCCasefoldInstance + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_create_filtered + * unorm2_openFiltered + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_destroy + * unorm2_close + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_normalize_second_and_append + * unorm2_normalizeSecondAndAppend + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_append + * unorm2_append + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_decomposition + * unorm2_getDecomposition + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_raw_decomposition + * unorm2_getRawDecomposition + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_compose_pair + * unorm2_composePair + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_get_combining_class + * unorm2_getCombiningClass + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_is_normalized + * unorm2_isNormalized + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_quick_check + * unorm2_quickCheck + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_span_quick_check_yes + * unorm2_spanQuickCheckYes + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_has_boundary_before + * unorm2_hasBoundaryBefore + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_has_boundary_after + * unorm2_hasBoundaryAfter + * + * + * @ref CAPI_BASE_UTILS_I18N_UNORMALIZATION_MODULE + * #i18n_unormalization_is_inert + * unorm2_isInert + * + * * @ref CAPI_BASE_UTILS_I18N_UNUMBER_MODULE * #i18n_unumber_create * unum_open diff --git a/src/include/utils_i18n_unormalization.h b/src/include/utils_i18n_unormalization.h index f83660e..ee34458 100644 --- a/src/include/utils_i18n_unormalization.h +++ b/src/include/utils_i18n_unormalization.h @@ -73,11 +73,11 @@ extern "C" { * @brief Gets a i18n_unormalizer_h which uses the specified data file and composes or decomposes text according to the specified mode. * @since_tizen @if MOBILE 2.3 @elseif WEARABLE 2.3.1 @endif * - * @param[in] package_name @c NULL for ICU built-in data, otherwise application data package name. - * @param[in] name "nfc" or "nfkc" or "nfkc_cf" or the name of the custom data file. - * @param[in] mode The normalization mode (compose or decompose). - * @param[out] normalizer The requested normalizer on success. - * + * @param[in] package_name @c NULL for ICU built-in data, otherwise application data package name. + * @param[in] name "nfc" or "nfkc" or "nfkc_cf" or the name of the custom data file. + * @param[in] mode The normalization mode (compose or decompose). + * @param[out] normalizer The requested normalizer on success. + * @return @c 0 on success, otherwise a negative error value * @retval #I18N_ERROR_NONE Successful * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter */ @@ -89,19 +89,339 @@ int i18n_unormalization_get_instance(const char *package_name, const char *name, * @details The source and destination strings must be different buffers. * @since_tizen @if MOBILE 2.3 @elseif WEARABLE 2.3.1 @endif * - * @param[in] normalizer i18n normalizer handle. - * @param[in] src The source string. - * @param[in] len The length of the source string, otherwise @c -1 if NULL-terminated. - * @param[out] dest The destination string\n - * Its contents are replaced with normalized @a src. - * @param[in] capacity The number of string_uchar that can be written to @a dest - * @param[out] len_deststr The length of the destination string - * + * @param[in] normalizer An #i18n_unormalizer_h handle. + * @param[in] src The source string. + * @param[in] len The length of the source string, otherwise @c -1 if NULL-terminated. + * @param[out] dest The destination string\n + * Its contents are replaced with normalized @a src. + * @param[in] capacity The number of string_uchar that can be written to @a dest + * @param[out] len_deststr The length of the destination string + * @return @c 0 on success, otherwise a negative error value * @retval #I18N_ERROR_NONE Successful * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter */ int i18n_unormalization_normalize(i18n_unormalizer_h normalizer, const i18n_uchar *src, int32_t len, i18n_uchar *dest, int32_t capacity, int32_t *len_deststr); +/** + * @brief Returns an #i18n_unormalizer_h instance for Unicode NFC normalization. + * @details Same as #i18n_unormalization_get_instance(NULL, "nfc", I18N_UNORM2_COMPOSE, n2). + * Returns an unmodifiable singleton instance. Do not delete it. + * @since_tizen 6.0 + * @param[out] normalizer The requested #i18n_unormalizer_h, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_nfc_instance(i18n_unormalizer_h* normalizer); + +/** + * @brief Returns an #i18n_unormalizer_h instance for Unicode NFD normalization. + * @details Same as #i18n_unormalization_get_instance(NULL, "nfc", I18N_UNORM2_DECOMPOSE, n2). + * Returns an unmodifiable singleton instance. Do not delete it. + * @since_tizen 6.0 + * @param[out] normalizer The requested #i18n_unormalizer_h, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_nfd_instance(i18n_unormalizer_h* normalizer); + +/** + * @brief Returns an #i18n_unormalizer_h instance for Unicode NFKC normalization. + * @details Same as #i18n_unormalization_get_instance(NULL, "nfkc", I18N_UNORM2_COMPOSE, n2). + * Returns an unmodifiable singleton instance. Do not delete it. + * @since_tizen 6.0 + * @param[out] normalizer The requested #i18n_unormalizer_h, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_nfkc_instance(i18n_unormalizer_h* normalizer); + +/** + * @brief Returns an #i18n_unormalizer_h instance for Unicode NFKD normalization. + * @details Same as #i18n_unormalization_get_instance(NULL, "nfkc", I18N_UNORM2_DECOMPOSE, n2). + * Returns an unmodifiable singleton instance. Do not delete it. + * @since_tizen 6.0 + * @param[out] normalizer The requested #i18n_unormalizer_h, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_nfkd_instance(i18n_unormalizer_h* normalizer); + +/** + * @brief Returns an #i18n_unormalizer_h instance for Unicode NFKC_Casefold normalization. + * @details Same as #i18n_unormalization_get_instance(NULL, "nfkc_cf", I18N_UNORM2_COMPOSE, n2). + * Returns an unmodifiable singleton instance. Do not delete it. + * @since_tizen 6.0 + * @param[out] normalizer The requested #i18n_unormalizer_h, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_nfkc_casefold_instance(i18n_unormalizer_h* normalizer); + + +/** + * @brief Constructs a filtered normalizer wrapping any #i18n_unormalizer_h and a filter set. + * @details Both are aliased and @a normalizer must not be modified or deleted while this object is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @since_tizen 6.0 + * @param[in] normalizer Wrapped #i18n_unormalizer_h + * @param[in] filter_set An #i18n_uset_h which determines the characters to be normalized + * @param[out] normalizer2 The requested Normalizer2, if successful + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_create_filtered(i18n_unormalizer_h normalizer, const i18n_uset_h *filter_set, i18n_unormalizer_h *normalizer2); + +/** + * @brief Closes a #i18n_unormalizer_h from #i18n_unormalization_open_filtered(). + * @details Do not destroy instances from #i18n_unormalization_get_instance() + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h to be closed + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_destroy(i18n_unormalizer_h normalizer); + +/** + * @brief Appends the normalized form of the second string to the first string (merging them at the boundary) + * and returns the length of the first string. + * @details The result is normalized if the first string was normalized. The first and second strings must be different buffers. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in,out] first A normalized string + * @param[in] first_length Length of the first string, or -1 if NUL-terminated + * @param[in] first_capacity Number of #i18n_uchar that can be written to @a first + * @param[in] second String, will be normalized + * @param[in] second_length Length of the source string, or -1 if NUL-terminated + * @param[out] len The length of the @a first string after merging + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_normalize_second_and_append(i18n_unormalizer_h normalizer, i18n_uchar *first, int32_t first_length, int32_t first_capacity, const i18n_uchar *second, int32_t second_length, int32_t *len); + +/** + * @brief Appends the second string to the first string (merging them at the boundary) and returns the length of the first string. + * @details The result is normalized if both the strings were normalized. The first and second strings must be different buffers. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in,out] first A normalized string. + * @param[in] first_length Length of the first string, or -1 if NUL-terminated + * @param[in] first_capacity Number of #i18n_uchar that can be written to @a first + * @param[in] second String, should be normalized + * @param[in] second_length Length of the source string, or -1 if NUL-terminated + * @param[out] len The length of the @a first string after merging + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_append(i18n_unormalizer_h normalizer, i18n_uchar *first, int32_t first_length, int32_t first_capacity, const i18n_uchar *second, int32_t second_length, int32_t *len); + +/** + * @brief Gets the decomposition mapping of @a code_point. + * @details Roughly equivalent to normalizing the String form of @a code_point on an #I18N_UNORM2_DECOMPOSE #i18n_unormalizer_h, + * but much faster, and except that this function returns a negative value and does not write a string + * if @a code_point does not have a decomposition mapping in this instance's data. + * This function is independent of the mode of the UNormalizer2. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] code_point Code point + * @param[in] decomposition String buffer which will be set to @a code_point's decomposition mapping, if there is one. + * @param[in] capacity Number of #i18n_uchar that can be written to decomposition + * @param[out] len The non-negative length of @a code_point's decomposition, if there is one; otherwise a negative value + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_decomposition(i18n_unormalizer_h normalizer, i18n_uchar32 code_point, i18n_uchar *decomposition, int32_t capacity, int32_t *len); + +/** + * @brief Gets the raw decomposition mapping of @a code_point. + * @details This is similar to the #i18n_unormalization_get_decomposition() function + * but returns the raw decomposition mapping as specified in UnicodeData.txt + * or (for custom data) in the mapping files processed by the gennorm2 tool. + * By contrast, #i18n_unormalization_get_decomposition() returns the processed, recursively-decomposed version of this mapping. + * + * When used on a standard NFKC Normalizer2 instance, #i18n_unormalization_get_raw_decomposition() returns the Unicode Decomposition_Mapping (dm) property. + * + * When used on a standard NFC Normalizer2 instance, it returns the Decomposition_Mapping + * only if the Decomposition_Type (dt) is Canonical (Can); in this case, the result contains either one or two code points (=1..4 #i18n_uchar). + * + * This function is independent of the mode of the UNormalizer2. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] code_point Code point. + * @param[in] decomposition String buffer which will be set to @a code_point's decomposition mapping, if there is one. + * @param[in] capacity Number of #i18n_uchar that can be written to decomposition + * @param[out] len The non-negative length of @a code_point's raw decomposition, if there is one; otherwise a negative value + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_get_raw_decomposition(i18n_unormalizer_h normalizer, i18n_uchar32 code_point, i18n_uchar *decomposition, int32_t capacity, int32_t *len); + +/** + * @brief Performs pairwise composition of two code points and returns the composite if there is one. + * @details Returns a composite code point c only if c has a two-way mapping to a+b. + * In standard Unicode normalization, this means that c has a canonical decomposition + * to a+b and c does not have the Full_Composition_Exclusion property. + * + * This function is independent of the mode of the UNormalizer2. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] a A (normalization starter) code point. + * @param[in] b Another code point. + * @param[out] c The non-negative composite code point if there is one; otherwise a negative value. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_compose_pair(i18n_unormalizer_h normalizer, i18n_uchar32 a, i18n_uchar32 b, i18n_uchar32 *c); + +/** + * @brief Gets the combining class of @a code_point. + * @details The default implementation returns 0 but all standard implementations return the Unicode Canonical_Combining_Class value. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] code_point Code point + * @return @a code_point's combining class + + * + */ +uint8_t i18n_unormalization_get_combining_class(i18n_unormalizer_h normalizer, i18n_uchar32 code_point); + +/** + * @brief Tests if the string is normalized. + * @details Internally, in cases where the #i18n_unormalization_quick_check() method would return "maybe" + * (which is only possible for the two COMPOSE modes) + * this method resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] s Input string + * @param[in] length Length of the string, or -1 if NUL-terminated + * @param[out] ret TRUE if s is normalized + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_is_normalized(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, i18n_ubool *ret); + +/** + * @brief Tests if the string is normalized. + * @details For the two COMPOSE modes, the result could be "maybe" in cases that would take a little more work to resolve definitively. + * Use #i18n_unormalization_span_quick_check_yes() and #i18n_unormalization_normalize_second_and_append() + * for a faster combination of quick check + normalization, to avoid re-checking the "yes" prefix. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] s Input string + * @param[in] length Length of the string, or -1 if NUL-terminated + * @param[out] result Normalization check result + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_quick_check(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, i18n_unormalization_check_result_e *result); + +/** + * @brief Gets the end of the normalized substring of the input string. + * @details The returned end index is usually one or more characters before the "no" or "maybe" character: + * The end index is at a normalization boundary. (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected to be normalized already, + * then call this method, and if it returns a prefix shorter than the input string, + * copy that prefix and use #i18n_unormalization_normalize_second_and_append() for the remainder. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] s Input string. + * @param[in] length Length of the string, or -1 if NUL-terminated. + * @param[out] result End index of the normalized beginning of the @a s string. + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_span_quick_check_yes(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, int32_t *result); + +/** + * @brief Tests if the character always has a normalization boundary before it, regardless of context. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] c Character to test + * @param[out] result TRUE if c has a normalization boundary before it + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalizer_has_boundary_before(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result); + +/** + * @brief Tests if the character always has a normalization boundary after it, regardless of context. + * @details For details see the Normalizer2 base class documentation. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] c Character to test + * @param[out] result TRUE if c has a normalization boundary after it + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalizer_has_boundary_after(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result); + +/** + * @brief Tests if the character is normalization-inert. + * @details For details see the Normalizer2 base class documentation. + * @since_tizen 6.0 + * @param[in] normalizer An #i18n_unormalizer_h + * @param[in] c Character to test + * @param[out] result TRUE if c is normalization-inert + * @return @c 0 on success, otherwise a negative error value + * @retval #I18N_ERROR_NONE Successful + * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter + * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory + * + */ +int i18n_unormalization_is_inert(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result); + #ifdef __cplusplus } #endif diff --git a/src/utils_i18n_unormalization.c b/src/utils_i18n_unormalization.c index fe66246..16df764 100644 --- a/src/utils_i18n_unormalization.c +++ b/src/utils_i18n_unormalization.c @@ -43,3 +43,223 @@ int i18n_unormalization_normalize(i18n_unormalizer_h normalizer, const i18n_ucha return result; } + +int i18n_unormalization_get_nfc_instance(i18n_unormalizer_h* normalizer) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer = (i18n_unormalizer_h *)unorm2_getNFCInstance(&icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_nfd_instance(i18n_unormalizer_h* normalizer) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer = (i18n_unormalizer_h *)unorm2_getNFDInstance(&icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_nfkc_instance(i18n_unormalizer_h* normalizer) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer = (i18n_unormalizer_h *)unorm2_getNFKCInstance(&icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_nfkd_instance(i18n_unormalizer_h* normalizer) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer = (i18n_unormalizer_h *)unorm2_getNFKDInstance(&icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_nfkc_casefold_instance(i18n_unormalizer_h* normalizer) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer = (i18n_unormalizer_h *)unorm2_getNFKCCasefoldInstance(&icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + + +int i18n_unormalization_create_filtered(i18n_unormalizer_h normalizer, const i18n_uset_h *filter_set, i18n_unormalizer_h *normalizer2) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *normalizer2 = (i18n_unormalizer_h *)unorm2_openFiltered((UNormalizer2 *) normalizer, filter_set, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_destroy(i18n_unormalizer_h normalizer) +{ + retv_if(normalizer == NULL, I18N_ERROR_INVALID_PARAMETER); + + unorm2_close(normalizer); + + return I18N_ERROR_NONE; +} + +int i18n_unormalization_normalize_second_and_append(i18n_unormalizer_h normalizer, i18n_uchar *first, int32_t first_length, int32_t first_capacity, const i18n_uchar *second, int32_t second_length, int32_t *len) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *len = unorm2_normalizeSecondAndAppend((UNormalizer2 *) normalizer, first, first_length, first_capacity, second, second_length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} +int i18n_unormalization_append(i18n_unormalizer_h normalizer, i18n_uchar *first, int32_t first_length, int32_t first_capacity, const i18n_uchar *second, int32_t second_length, int32_t *len) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *len = unorm2_append((UNormalizer2 *) normalizer, first, first_length, first_capacity, second, second_length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_decomposition(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_uchar *decomposition, int32_t capacity, int32_t *len) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *len = unorm2_getDecomposition((UNormalizer2 *) normalizer, c, decomposition, capacity, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_get_raw_decomposition(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_uchar *decomposition, int32_t capacity, int32_t *len) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *len = unorm2_getRawDecomposition((UNormalizer2 *) normalizer, c, decomposition, capacity, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_compose_pair(i18n_unormalizer_h normalizer, i18n_uchar32 a, i18n_uchar32 b, i18n_uchar32 *c) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *c = unorm2_composePair((UNormalizer2 *) normalizer, a, b); + + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +uint8_t i18n_unormalization_get_combining_class(i18n_unormalizer_h normalizer, i18n_uchar32 code_point) +{ + return unorm2_getCombiningClass((UNormalizer2 *) normalizer, code_point); +} + +int i18n_unormalization_is_normalized(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, i18n_ubool *ret) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *ret = unorm2_isNormalized((UNormalizer2 *) normalizer, s, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_quick_check(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, i18n_unormalization_check_result_e *result) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *result = unorm2_quickCheck((UNormalizer2 *) normalizer, s, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_span_quick_check_yes(i18n_unormalizer_h normalizer, const i18n_uchar *s, int32_t length, int32_t *result) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *result = unorm2_spanQuickCheckYes((UNormalizer2 *) normalizer, s, length, &icu_error); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalizer_has_boundary_before(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *result = unorm2_hasBoundaryBefore((UNormalizer2 *) normalizer, c); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalizer_has_boundary_after(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *result = unorm2_hasBoundaryAfter((UNormalizer2 *) normalizer, c); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} + +int i18n_unormalization_is_inert(i18n_unormalizer_h normalizer, i18n_uchar32 c, i18n_ubool *result) +{ + i18n_error_code_e i18n_error; + UErrorCode icu_error = U_ZERO_ERROR; + + *result = unorm2_isInert((UNormalizer2 *) normalizer, c); + ERR_MAPPING(icu_error, i18n_error); + I18N_ERR(i18n_error); + + return i18n_error; +} +