utils_i18n_uchar_iter.c
utils_i18n_unumsys.c
utils_i18n_utext.c
+ utils_i18n_uscript.c
utils_i18n_plural_rules.cpp
utils_i18n_plural_format.cpp
utils_i18n_immutable_idx.cpp
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_loc_disp_names.h DESTINATION ${INCLUDE_INSTALL_DIR}/base)
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_unumsys.h DESTINATION ${INCLUDE_INSTALL_DIR}/base)
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_utext.h DESTINATION ${INCLUDE_INSTALL_DIR}/base)
+INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n_uscript.h DESTINATION ${INCLUDE_INSTALL_DIR}/base)
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${INC_DIR}/utils_i18n.h DESTINATION ${INCLUDE_INSTALL_DIR}/base)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/${pc_name}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
#include <utils_i18n_loc_disp_names.h>
#include <utils_i18n_unumsys.h>
#include <utils_i18n_utext.h>
+#include <utils_i18n_uscript.h>
/**
* @file utils_i18n.h
* - locale display names
* - numbering system
* - utext
+ * - unicode Script Information
*
* This module provides flexible generation of number or date format patterns and helps you format and parse dates/number for any locale.
* The i18n module provides various features based on data from ICU. The following table shows the version of ICU used in each Tizen platform.
* <td>@ref CAPI_BASE_UTILS_I18N_UTEXT_MODULE</td>
* <td>Abstract Unicode Text API.</td>
* </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>Unicode Script Information</td>
+ * </tr>
* </table>
*
* @section CAPI_BASE_UTILS_I18N_MODULE_MAPPING_TABLE Mapping Table
* <td>#i18n_utext_freeze</td>
* <td>utext_freeze</td>
* </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_codes</td>
+ * <td>uscript_getCode</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_name</td>
+ * <td>uscript_getName</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_short_name</td>
+ * <td>uscript_getShortName</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_script</td>
+ * <td>uscript_getScript</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_has_script</td>
+ * <td>uscript_hasScript</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_script_extensions</td>
+ * <td>uscript_getScriptExtensions</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_sample_string</td>
+ * <td>uscript_getSampleString</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_get_usage</td>
+ * <td>uscript_getUsage</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_is_right_to_left</td>
+ * <td>uscript_isRightToLeft</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_breaks_between_letters</td>
+ * <td>uscript_breaksBetweenLetters</td>
+ * </tr>
+ * <tr>
+ * <td>@ref CAPI_BASE_UTILS_I18N_USCRIPT_MODULE</td>
+ * <td>#i18n_uscript_is_cased</td>
+ * <td>uscript_isCased</td>
+ * </tr>
* </table>
*/
/**
- * @addtogroup CAPI_BASE_UTILS_I18N_UCHAR_MODULE
+ * @addtogroup CAPI_BASE_UTILS_I18N_USCRIPT_MODULE
* @{
*/
I18N_USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/**< Hluw */
I18N_USCRIPT_KHOJKI = 157,/**< Khoj */
I18N_USCRIPT_TIRHUTA = 158,/**< Tirh */
- I18N_USCRIPT_CODE_LIMIT = 159 /**< Count of i18n_uscript_code_e enumerators*/
+ I18N_USCRIPT_CAUCASIAN_ALBANIAN = 159,/**< Aghb (Since 6.0)*/
+ I18N_USCRIPT_MAHAJANI = 160,/**< Mahj (Since 6.0)*/
+ I18N_USCRIPT_AHOM = 161,/**< Ahom (Since 6.0)*/
+ I18N_USCRIPT_HATRAN = 162,/**< Hatr (Since 6.0)*/
+ I18N_USCRIPT_MODI = 163,/**< Modi (Since 6.0)*/
+ I18N_USCRIPT_MULTANI = 164,/**< Mult (Since 6.0)*/
+ I18N_USCRIPT_PAU_CIN_HAU = 165,/**< Pauc (Since 6.0)*/
+ I18N_USCRIPT_SIDDHAM = 166,/**< Sidd (Since 6.0)*/
+ I18N_USCRIPT_ADLAM = 167,/**< Adlm (Since 6.0)*/
+ I18N_USCRIPT_BHAIKSUKI = 168,/**< Bhks (Since 6.0)*/
+ I18N_USCRIPT_MARCHEN = 169,/**< Marc (Since 6.0)*/
+ I18N_USCRIPT_NEWA = 170,/**< Newa (Since 6.0)*/
+ I18N_USCRIPT_OSAGE = 171,/**< Osge (Since 6.0)*/
+ I18N_USCRIPT_HAN_WITH_BOPOMOFO = 172,/**< Hanb (Since 6.0)*/
+ I18N_USCRIPT_JAMO = 173,/**< Jamo (Since 6.0)*/
+ I18N_USCRIPT_SYMBOLS_EMOJI = 174,/**< Zsye (Since 6.0)*/
+ I18N_USCRIPT_MASARAM_GONDI = 175,/**< Gonm (Since 6.0)*/
+ I18N_USCRIPT_SOYOMBO = 176,/**< Soyo (Since 6.0)*/
+ I18N_USCRIPT_ZANABAZAR_SQUARE = 177,/**< Zanb (Since 6.0)*/
+ I18N_USCRIPT_DOGRA = 178,/**< Dogr (Since 6.0)*/
+ I18N_USCRIPT_GUNJALA_GONDI = 179,/**< Gong (Since 6.0)*/
+ I18N_USCRIPT_MAKASAR = 180,/**< Maka (Since 6.0)*/
+ I18N_USCRIPT_MEDEFAIDRIN = 181,/**< Medf (Since 6.0)*/
+ I18N_USCRIPT_HANIFI_ROHINGYA = 182,/**< Rohg (Since 6.0)*/
+ I18N_USCRIPT_SOGDIAN = 183,/**< Sogd (Since 6.0)*/
+ I18N_USCRIPT_OLD_SOGDIAN = 184,/**< Sogo (Since 6.0)*/
+ I18N_USCRIPT_ELYMAIC = 185,/**< Elym (Since 6.0)*/
+ I18N_USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/**< Hmnp (Since 6.0)*/
+ I18N_USCRIPT_NANDINAGARI = 187,/**< Nand (Since 6.0)*/
+ I18N_USCRIPT_WANCHO = 188,/**< Wcho (Since 6.0)*/
+ I18N_USCRIPT_CODE_LIMIT = 189 /**< Count of i18n_uscript_code_e enumerators*/
} i18n_uscript_code_e;
/**
+ * @brief Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ * @since_tizen 6.0
+ */
+typedef enum {
+ I18N_USCRIPT_USAGE_NOT_ENCODED, /**< Not encoded in Unicode.*/
+ I18N_USCRIPT_USAGE_UNKNOWN, /**< Unknown script usage.*/
+ I18N_USCRIPT_USAGE_EXCLUDED, /**< Candidate for Exclusion from Identifiers.*/
+ I18N_USCRIPT_USAGE_LIMITED_USE, /**< Limited Use script.*/
+ I18N_USCRIPT_USAGE_ASPIRATIONAL, /**< Aspirational Use script.*/
+ I18N_USCRIPT_USAGE_RECOMMENDED /**< Recommended script.*/
+} i18n_uscript_usage_e;
+
+
+/**
* @}
*/
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTILS_I18N_USCRIPT_H__
+#define __UTILS_I18N_USCRIPT_H__
+
+#include <utils_i18n_types.h>
+
+/**
+ * @file utils_i18n_uscript.h
+ * @version 0.1
+ * @brief utils_i18n_uscript
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @ingroup CAPI_BASE_UTILS_I18N_MODULE
+ * @defgroup CAPI_BASE_UTILS_I18N_USCRIPT_MODULE Utext
+ * @brief Abstract Unicode Text API.
+ * @section CAPI_BASE_UTILS_I18N_USCRIPT_MODULE_HEADER Required Header
+ * \#include <utils_i18n.h>
+ *
+ * @section CAPI_BASE_UTILS_I18N_USCRIPT_MODULE_OVERVIEW Overview
+ * @details Unicode Script Information.
+ *
+ */
+
+/**
+ * @addtogroup CAPI_BASE_UTILS_I18N_USCRIPT_MODULE
+ * @{
+ */
+
+/**
+ * @brief Gets the script codes associated with the specified language.
+ * @details The language is described using either locale, ISO 15924 name or ISO 15924 abbreviation.
+ * Example: If "Malayam" or "Mlym" is given, the expected result is #I18N_USCRIPT_MALAYALAM.
+ * Note: To search by short or long script alias only,
+ * use #i18n_uchar_get_property_value_enum() instead.
+ * That does a fast lookup with no access of the locale data.
+ * @since_tizen 6.0
+ * @remarks @a codes must be allocated before calling the function.
+ * If the required capacity is greater than the capacity of @a codes, then the @a length is set to the required capacity and #I18N_ERROR_BUFFER_OVERFLOW is returned.
+ * @param[in] language The language, for which the script codes are to be retrieved
+ * @param[in,out] codes The array of codes associated with the specified language
+ * @param[in] capacity Capacity of the @a codes array
+ * @param[out] length The number of items written to the array, or the required capacity if the array's capacity is insufficient
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #I18N_ERROR_NONE Successful
+ * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
+ * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #I18N_ERROR_BUFFER_OVERFLOW The supplied array @a codes is of insufficient capacity
+ *
+ */
+int i18n_uscript_get_codes(const char* language, i18n_uscript_code_e* codes, int32_t capacity, int32_t *length);
+
+/**
+ * @brief Gets the script name for the specified script code.
+ * @details Returns the long unicode script name, if there is one. Otherwise returns the 4-letter ISO 15924 script code
+ * Example: If #I18N_USCRIPT_MALAYALAM is given, the expected output is "Malayam".
+ * @since_tizen 6.0
+ * @param[in] script_code Uscript code enum
+ * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if #i18n_uscript_code_e is invalid
+ *
+ */
+const char* i18n_uscript_get_name(i18n_uscript_code_e script_code);
+
+/**
+ * @brief Gets the short script name for the specified script code.
+ * @details Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
+ * Example: If #I18N_USCRIPT_MALAYALAM is given, the expected output is "Mlym".
+ * @since_tizen 6.0
+ * @param[in] script_code Uscript code enum
+ * @return short script name (4-letter code), or NULL if @a script_code is invalid
+ *
+ */
+const char* i18n_uscript_get_short_name(i18n_uscript_code_e script_code);
+
+/**
+ * @brief Gets the script code associated with the given @a codepoint.
+ * @details If the specified @a codepoint is invalid, the script code returned is equal to 0.
+ * Example: If 0x0D02 is given, the expected output is #I18N_USCRIPT_MALAYALAM.
+ * @since_tizen 6.0
+ * @param[in] codepoint #i18n_uchar32 @a codepoint
+ * @param[out] script_code The code of the script, that the specified @a codepoint belongs to.
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #I18N_ERROR_NONE Successful
+ * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
+ * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ */
+int i18n_uscript_get_script(i18n_uchar32 codepoint, i18n_uscript_code_e *script_code);
+
+/**
+ * @brief Gets a value indicating whether the Script Extensions of the specified @a codepoint contain the specified script.
+ * @details If @a codepoint does not have explicit Script Extensions, then this tests whether @a codepoint has the Script property value @a script_code.
+ * Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ * @since_tizen 6.0
+ * @param[in] codepoint Code point
+ * @param[in] script_code Script code
+ * @return TRUE if @a script_code is in Script Extensions (@a codepoint)
+ *
+ */
+i18n_ubool i18n_uscript_has_script(i18n_uchar32 codepoint, i18n_uscript_code_e script_code);
+
+/**
+ * @brief Gets the Script Extensions for the specified @a codepoint.
+ * @details
+ * - If @a codepoint does have Script Extensions, then the Script property value
+ * (normally Common or Inherited) is not included.
+ * - If @a codepoint does not have Script Extensions, then the one Script code is written to the output array.
+ * - If @a codepoint is not a valid code point, then the one #I18N_USCRIPT_UNKNOWN code is written.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ *
+ * @since_tizen 6.0
+ * @remarks @a scripts must be allocated before calling the function.
+ * If the required capacity is greater than the capacity of @a scripts, then the @a length is set to the required capacity and #I18N_ERROR_BUFFER_OVERFLOW is returned.
+ * @param[in] codepoint Code point
+ * @param[in,out] scripts The array of Script Extensions for the specified @a codepoint
+ * @param[in] capacity Capacity of the @a scripts array
+ * @param[out] length The number of items written to the array, or the required capacity if the array's capacity is insufficient
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #I18N_ERROR_NONE Successful
+ * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
+ * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #I18N_ERROR_BUFFER_OVERFLOW The supplied array @a scripts is of insufficient capacity
+ *
+ */
+int i18n_uscript_get_script_extensions(i18n_uchar32 codepoint, i18n_uscript_code_e *scripts, int32_t capacity, int32_t *length);
+
+/**
+ * @brief Gets the script sample character string.
+ * @details This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.
+ * @since_tizen 6.0
+ * @remarks @a sample must be allocated before calling the function.
+ * If the required capacity is greater than the capacity of @a sample, then the @a length is set to the required capacity and #I18N_ERROR_BUFFER_OVERFLOW is returned.
+ * @param[in] script Script code
+ * @param[in,out] sample The sample string for the specified @a codepoint
+ * @param[in] capacity The number of #i18n_uchar characters that @a sample can hold
+ * @param[out] length The number of #i18n_uchar characters written to the string, or the required capacity if the string's capacity is insufficient
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #I18N_ERROR_NONE Successful
+ * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
+ * @retval #I18N_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #I18N_ERROR_BUFFER_OVERFLOW The supplied array @a sample is of insufficient capacity
+ *
+ */
+int i18n_uscript_get_sample_string(i18n_uscript_code_e script, i18n_uchar *sample, int32_t capacity, int32_t *length);
+
+
+/**
+ * @brief Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * @details Returns #I18N_USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ * @since_tizen 6.0
+ * @param[in] script Script code
+ * @return script usage
+ *
+ */
+i18n_uscript_usage_e i18n_uscript_get_usage(i18n_uscript_code_e script);
+
+/**
+ * @brief Gets a value indicating whether the script is written right-to-left.
+ * @details For example, Arab and Hebr.
+ * @since_tizen 6.0
+ * @param[in] script Script code
+ * @return TRUE if the script is right-to-left
+ *
+ */
+i18n_ubool i18n_uscript_is_right_to_left(i18n_uscript_code_e script);
+
+/**
+ * @brief Gets a value indicating whether the script allows line breaks between letters (excluding hyphenation).
+ * @details Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.
+ * @since_tizen 6.0
+ * @param[in] script Script code
+ * @return TRUE if the script allows line breaks between letters
+ *
+ */
+i18n_ubool i18n_uscript_breaks_between_letters(i18n_uscript_code_e script);
+
+/**
+ * @brief Gets a value indicating whether the script case distinctions in modern usage are customary.
+ * @details For example, Latn and Cyrl.
+ * @since_tizen 6.0
+ * @param[in] script Script code
+ * @return TRUE if the script is cased
+ *
+ */
+i18n_ubool i18n_uscript_is_cased(i18n_uscript_code_e script);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @}
+ * @}
+ */
+#endif /* __UTILS_I18N_USCRIPT_H__*/
--- /dev/null
+/*
+* Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <unicode/uscript.h>
+
+#include <utils_i18n_uscript.h>
+#include <utils_i18n_private.h>
+
+int i18n_uscript_get_codes(const char* language, i18n_uscript_code_e* codes, int32_t capacity, int32_t *length)
+{
+ i18n_error_code_e i18n_error;
+ UErrorCode icu_error = U_ZERO_ERROR;
+
+ *length = uscript_getCode(language, (UScriptCode*)codes, capacity, &icu_error);
+ ERR_MAPPING(icu_error, i18n_error);
+ I18N_ERR(i18n_error);
+
+ return i18n_error;
+}
+
+const char *i18n_uscript_get_name(i18n_uscript_code_e script_code)
+{
+ return uscript_getName(script_code);
+}
+
+const char *i18n_uscript_get_short_name(i18n_uscript_code_e script_code)
+{
+ return uscript_getShortName(script_code);
+}
+
+int i18n_uscript_get_script(i18n_uchar32 codepoint, i18n_uscript_code_e *script_code)
+{
+ i18n_error_code_e i18n_error;
+ UErrorCode icu_error = U_ZERO_ERROR;
+
+ *script_code = uscript_getScript(codepoint, &icu_error);
+ ERR_MAPPING(icu_error, i18n_error);
+ I18N_ERR(i18n_error);
+
+ return i18n_error;
+}
+
+i18n_ubool i18n_uscript_has_script(i18n_uchar32 codepoint, i18n_uscript_code_e script_code)
+{
+ return uscript_hasScript(codepoint, script_code);
+}
+
+int i18n_uscript_get_script_extensions(i18n_uchar32 codepoint, i18n_uscript_code_e *scripts, int32_t capacity, int32_t *length)
+{
+ i18n_error_code_e i18n_error;
+ UErrorCode icu_error = U_ZERO_ERROR;
+
+ length = uscript_getScriptExtensions(codepoint, (UScriptCode*)scripts, capacity, &icu_error);
+ ERR_MAPPING(icu_error, i18n_error);
+ I18N_ERR(i18n_error);
+
+ return i18n_error;
+}
+
+int i18n_uscript_get_sample_string(i18n_uscript_code_e script, i18n_uchar *sample, int32_t capacity, int32_t *length)
+{
+ retv_if(script >= I18N_USCRIPT_CODE_LIMIT || script <= I18N_USCRIPT_INVALID_CODE, I18N_USCRIPT_INVALID_CODE);
+
+ i18n_error_code_e i18n_error;
+ UErrorCode icu_error = U_ZERO_ERROR;
+
+ length = uscript_getSampleString(script, sample, capacity, &icu_error);
+ ERR_MAPPING(icu_error, i18n_error);
+ I18N_ERR(i18n_error);
+
+ return i18n_error;
+}
+
+i18n_uscript_usage_e i18n_uscript_get_usage(i18n_uscript_code_e script)
+{
+ return uscript_getUsage(script);
+}
+
+i18n_ubool i18n_uscript_is_right_to_left(i18n_uscript_code_e script)
+{
+ return uscript_isRightToLeft(script);
+}
+
+i18n_ubool i18n_uscript_breaks_between_letters(i18n_uscript_code_e script)
+{
+ return uscript_breaksBetweenLetters(script);
+}
+
+i18n_ubool i18n_uscript_is_cased(i18n_uscript_code_e script)
+{
+ return uscript_isCased(script);
+}
+