X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgunicode.h;h=7ec6862ce7a59a8bf9088054b4cb003ad9ad5911;hb=ecf1359191b2f796a7d63288652dd1a93525417d;hp=f89c07c1987dbde21226ae0ab5273f734884b765;hpb=3ac7c35656649b1d1fcf2ccaa670b854809d4cd8;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gunicode.h b/glib/gunicode.h index f89c07c..7ec6862 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -15,17 +15,16 @@ * * You should have received a copy of the GNU Lesser General Public * License along with the Gnome Library; see the file COPYING.LIB. If not, - * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * see . */ +#ifndef __G_UNICODE_H__ +#define __G_UNICODE_H__ + #if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) #error "Only can be included directly." #endif -#ifndef __G_UNICODE_H__ -#define __G_UNICODE_H__ - #include #include @@ -199,13 +198,14 @@ typedef enum * @G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: Hangul LV Syllable (H2) * @G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: Hangul LVT Syllable (H3) * @G_UNICODE_BREAK_CLOSE_PARANTHESIS: Closing Parenthesis (CP). Since 2.28 + * @G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER: Conditional Japanese Starter (CJ). Since: 2.32 + * @G_UNICODE_BREAK_HEBREW_LETTER: Hebrew Letter (HL). Since: 2.32 + * @G_UNICODE_BREAK_REGIONAL_INDICATOR: Regional Indicator (RI). Since: 2.36 * * These are the possible line break classifications. * - * The five Hangul types were added in Unicode 4.1, so, has been - * introduced in GLib 2.10. Note that new types may be added in the future. - * Applications should be ready to handle unknown values. - * They may be regarded as %G_UNICODE_BREAK_UNKNOWN. + * Since new unicode versions may add new types here, applications should be ready + * to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN. * * See http://www.unicode.org/unicode/reports/tr14/. */ @@ -247,7 +247,10 @@ typedef enum G_UNICODE_BREAK_HANGUL_T_JAMO, G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, - G_UNICODE_BREAK_CLOSE_PARANTHESIS + G_UNICODE_BREAK_CLOSE_PARANTHESIS, + G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, + G_UNICODE_BREAK_HEBREW_LETTER, + G_UNICODE_BREAK_REGIONAL_INDICATOR } GUnicodeBreakType; /** @@ -361,6 +364,36 @@ typedef enum * @G_UNICODE_SCRIPT_BATAK: Batak. Since 2.28 * @G_UNICODE_SCRIPT_BRAHMI: Brahmi. Since 2.28 * @G_UNICODE_SCRIPT_MANDAIC: Mandaic. Since 2.28 + * @G_UNICODE_SCRIPT_CHAKMA: Chakma. Since: 2.32 + * @G_UNICODE_SCRIPT_MEROITIC_CURSIVE: Meroitic Cursive. Since: 2.32 + * @G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS: Meroitic Hieroglyphs. Since: 2.32 + * @G_UNICODE_SCRIPT_MIAO: Miao. Since: 2.32 + * @G_UNICODE_SCRIPT_SHARADA: Sharada. Since: 2.32 + * @G_UNICODE_SCRIPT_SORA_SOMPENG: Sora Sompeng. Since: 2.32 + * @G_UNICODE_SCRIPT_TAKRI: Takri. Since: 2.32 + * @G_UNICODE_SCRIPT_BASSA_VAH: Bassa. Since: 2.42 + * @G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN: Caucasian Albanian. Since: 2.42 + * @G_UNICODE_SCRIPT_DUPLOYAN: Duployan. Since: 2.42 + * @G_UNICODE_SCRIPT_ELBASAN: Elbasan. Since: 2.42 + * @G_UNICODE_SCRIPT_GRANTHA: Grantha. Since: 2.42 + * @G_UNICODE_SCRIPT_KHOJKI: Kjohki. Since: 2.42 + * @G_UNICODE_SCRIPT_KHUDAWADI: Khudawadi, Sindhi. Since: 2.42 + * @G_UNICODE_SCRIPT_LINEAR_A: Linear A. Since: 2.42 + * @G_UNICODE_SCRIPT_MAHAJANI: Mahajani. Since: 2.42 + * @G_UNICODE_SCRIPT_MANICHAEAN: Manichaean. Since: 2.42 + * @G_UNICODE_SCRIPT_MENDE_KIKAKUI: Mende Kikakui. Since: 2.42 + * @G_UNICODE_SCRIPT_MODI: Modi. Since: 2.42 + * @G_UNICODE_SCRIPT_MRO: Mro. Since: 2.42 + * @G_UNICODE_SCRIPT_NABATAEAN: Nabataean. Since: 2.42 + * @G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN: Old North Arabian. Since: 2.42 + * @G_UNICODE_SCRIPT_OLD_PERMIC: Old Permic. Since: 2.42 + * @G_UNICODE_SCRIPT_PAHAWH_HMONG: Pahawh Hmong. Since: 2.42 + * @G_UNICODE_SCRIPT_PALMYRENE: Palmyrene. Since: 2.42 + * @G_UNICODE_SCRIPT_PAU_CIN_HAU: Pau Cin Hau. Since: 2.42 + * @G_UNICODE_SCRIPT_PSALTER_PAHLAVI: Psalter Pahlavi. Since: 2.42 + * @G_UNICODE_SCRIPT_SIDDHAM: Siddham. Since: 2.42 + * @G_UNICODE_SCRIPT_TIRHUTA: Tirhuta. Since: 2.42 + * @G_UNICODE_SCRIPT_WARANG_CITI Warang Citi. Since: 2.42 * * The #GUnicodeScript enumeration identifies different writing * systems. The values correspond to the names as defined in the @@ -377,13 +410,13 @@ typedef enum { /* ISO 15924 code */ G_UNICODE_SCRIPT_INVALID_CODE = -1, G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ - G_UNICODE_SCRIPT_INHERITED, /* Qaai */ + G_UNICODE_SCRIPT_INHERITED, /* Zinh (Qaai) */ G_UNICODE_SCRIPT_ARABIC, /* Arab */ G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ G_UNICODE_SCRIPT_BENGALI, /* Beng */ G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ - G_UNICODE_SCRIPT_COPTIC, /* Qaac */ + G_UNICODE_SCRIPT_COPTIC, /* Copt (Qaac) */ G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ G_UNICODE_SCRIPT_DESERET, /* Dsrt */ G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ @@ -483,94 +516,162 @@ typedef enum /* Unicode-6.0 additions */ G_UNICODE_SCRIPT_BATAK, /* Batk */ G_UNICODE_SCRIPT_BRAHMI, /* Brah */ - G_UNICODE_SCRIPT_MANDAIC /* Mand */ + G_UNICODE_SCRIPT_MANDAIC, /* Mand */ + + /* Unicode-6.1 additions */ + G_UNICODE_SCRIPT_CHAKMA, /* Cakm */ + G_UNICODE_SCRIPT_MEROITIC_CURSIVE, /* Merc */ + G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, /* Mero */ + G_UNICODE_SCRIPT_MIAO, /* Plrd */ + G_UNICODE_SCRIPT_SHARADA, /* Shrd */ + G_UNICODE_SCRIPT_SORA_SOMPENG, /* Sora */ + G_UNICODE_SCRIPT_TAKRI, /* Takr */ + + /* Unicode 7.0 additions */ + G_UNICODE_SCRIPT_BASSA_VAH, /* Bass */ + G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, /* Aghb */ + G_UNICODE_SCRIPT_DUPLOYAN, /* Dupl */ + G_UNICODE_SCRIPT_ELBASAN, /* Elba */ + G_UNICODE_SCRIPT_GRANTHA, /* Gran */ + G_UNICODE_SCRIPT_KHOJKI, /* Khoj */ + G_UNICODE_SCRIPT_KHUDAWADI, /* Sind */ + G_UNICODE_SCRIPT_LINEAR_A, /* Lina */ + G_UNICODE_SCRIPT_MAHAJANI, /* Mahj */ + G_UNICODE_SCRIPT_MANICHAEAN, /* Manu */ + G_UNICODE_SCRIPT_MENDE_KIKAKUI, /* Mend */ + G_UNICODE_SCRIPT_MODI, /* Modi */ + G_UNICODE_SCRIPT_MRO, /* Mroo */ + G_UNICODE_SCRIPT_NABATAEAN, /* Nbat */ + G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, /* Narb */ + G_UNICODE_SCRIPT_OLD_PERMIC, /* Perm */ + G_UNICODE_SCRIPT_PAHAWH_HMONG, /* Hmng */ + G_UNICODE_SCRIPT_PALMYRENE, /* Palm */ + G_UNICODE_SCRIPT_PAU_CIN_HAU, /* Pauc */ + G_UNICODE_SCRIPT_PSALTER_PAHLAVI, /* Phlp */ + G_UNICODE_SCRIPT_SIDDHAM, /* Sidd */ + G_UNICODE_SCRIPT_TIRHUTA, /* Tirh */ + G_UNICODE_SCRIPT_WARANG_CITI /* Wara */ } GUnicodeScript; +GLIB_AVAILABLE_IN_ALL guint32 g_unicode_script_to_iso15924 (GUnicodeScript script); +GLIB_AVAILABLE_IN_ALL GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924); /* These are all analogs of the functions. */ +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_islower (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_iswide_cjk(gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_iszerowidth(gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_ismark (gunichar c) G_GNUC_CONST; /* More functions. These convert between the three cases. * See the Unicode book to understand title case. */ +GLIB_AVAILABLE_IN_ALL gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST; -/* If C is a digit (according to `g_unichar_isdigit'), then return its +/* If C is a digit (according to 'g_unichar_isdigit'), then return its numeric value. Otherwise return -1. */ +GLIB_AVAILABLE_IN_ALL gint g_unichar_digit_value (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST; /* Return the Unicode character type of a given character. */ +GLIB_AVAILABLE_IN_ALL GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST; /* Return the line break property for a given character */ +GLIB_AVAILABLE_IN_ALL GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST; /* Returns the combining class for a given character */ +GLIB_AVAILABLE_IN_ALL gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST; +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); +GLIB_AVAILABLE_IN_ALL GUnicodeScript g_unichar_get_script (gunichar ch) G_GNUC_CONST; /* Validate a Unicode character */ +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_validate (gunichar ch) G_GNUC_CONST; /* Pairwise canonical compose/decompose */ +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_compose (gunichar a, gunichar b, gunichar *ch); +GLIB_AVAILABLE_IN_ALL gboolean g_unichar_decompose (gunichar ch, gunichar *a, gunichar *b); +GLIB_AVAILABLE_IN_ALL gsize g_unichar_fully_decompose (gunichar ch, gboolean compat, gunichar *result, gsize result_len); /** - * G_UNICHAR_MAX_COMPAT_DECOMPOSITION_LEN: + * G_UNICHAR_MAX_DECOMPOSITION_LENGTH: * * The maximum length (in codepoints) of a compatibility or canonical * decomposition of a single Unicode character. * * This is as defined by Unicode 6.1. * - * Since: 2.31.3 + * Since: 2.32 */ -#define G_UNICHAR_MAX_DECOMPOSITION_LEN 18 /* codepoints */ +#define G_UNICHAR_MAX_DECOMPOSITION_LENGTH 18 /* codepoints */ /* Compute canonical ordering of a string in-place. This rearranges decomposed characters in the string according to their combining classes. See the Unicode manual for more information. */ +GLIB_AVAILABLE_IN_ALL void g_unicode_canonical_ordering (gunichar *string, gsize len); -GLIB_DEPRECATED +GLIB_DEPRECATED_IN_2_30 gunichar *g_unicode_canonical_decomposition (gunichar ch, gsize *result_len) G_GNUC_MALLOC; @@ -591,87 +692,112 @@ GLIB_VAR const gchar * const g_utf8_skip; */ #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const guchar *)(p)]) +GLIB_AVAILABLE_IN_ALL gunichar g_utf8_get_char (const gchar *p) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_prev_char (const gchar *p) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_find_next_char (const gchar *p, const gchar *end) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL glong g_utf8_strlen (const gchar *p, gssize max) G_GNUC_PURE; +GLIB_AVAILABLE_IN_2_30 gchar *g_utf8_substring (const gchar *str, glong start_pos, glong end_pos) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); /* Find the UTF-8 character corresponding to ch, in string p. These functions are equivalants to strchr and strrchr */ +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c); +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c); +GLIB_AVAILABLE_IN_ALL gchar* g_utf8_strreverse (const gchar *str, gssize len); +GLIB_AVAILABLE_IN_ALL gunichar2 *g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gunichar * g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gunichar * g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gunichar * g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); +GLIB_AVAILABLE_IN_ALL gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end); +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_strup (const gchar *str, gssize len) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_strdown (const gchar *str, gssize len) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_casefold (const gchar *str, gssize len) G_GNUC_MALLOC; @@ -710,20 +836,23 @@ typedef enum { G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE } GNormalizeMode; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gint g_utf8_collate (const gchar *str1, const gchar *str2) G_GNUC_PURE; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_collate_key (const gchar *str, gssize len) G_GNUC_MALLOC; +GLIB_AVAILABLE_IN_ALL gchar *g_utf8_collate_key_for_filename (const gchar *str, gssize len) G_GNUC_MALLOC; /* private */ - gchar *_g_utf8_make_valid (const gchar *name); G_END_DECLS