*
* You should have received a copy of the GNU Lesser General Public
* License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * see <http://www.gnu.org/licenses/>.
*/
+#ifndef __G_UNICODE_H__
+#define __G_UNICODE_H__
+
#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
#error "Only <glib.h> can be included directly."
#endif
-#ifndef __G_UNICODE_H__
-#define __G_UNICODE_H__
-
#include <glib/gerror.h>
#include <glib/gtypes.h>
* @G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: Hangul LV Syllable (H2)
* @G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: Hangul LVT Syllable (H3)
* @G_UNICODE_BREAK_CLOSE_PARANTHESIS: Closing Parenthesis (CP). Since 2.28
+ * @G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER: Conditional Japanese Starter (CJ). Since: 2.32
+ * @G_UNICODE_BREAK_HEBREW_LETTER: Hebrew Letter (HL). Since: 2.32
+ * @G_UNICODE_BREAK_REGIONAL_INDICATOR: Regional Indicator (RI). Since: 2.36
*
* These are the possible line break classifications.
*
- * The five Hangul types were added in Unicode 4.1, so, has been
- * introduced in GLib 2.10. Note that new types may be added in the future.
- * Applications should be ready to handle unknown values.
- * They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
+ * Since new unicode versions may add new types here, applications should be ready
+ * to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
*
* See <ulink url="http://www.unicode.org/unicode/reports/tr14/">http://www.unicode.org/unicode/reports/tr14/</ulink>.
*/
G_UNICODE_BREAK_HANGUL_T_JAMO,
G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE,
- G_UNICODE_BREAK_CLOSE_PARANTHESIS
+ G_UNICODE_BREAK_CLOSE_PARANTHESIS,
+ G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER,
+ G_UNICODE_BREAK_HEBREW_LETTER,
+ G_UNICODE_BREAK_REGIONAL_INDICATOR
} GUnicodeBreakType;
/**
* @G_UNICODE_SCRIPT_MANDAIC: Mandaic. Since 2.28
* @G_UNICODE_SCRIPT_CHAKMA: Chakma. Since: 2.32
* @G_UNICODE_SCRIPT_MEROITIC_CURSIVE: Meroitic Cursive. Since: 2.32
- * @G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, Meroitic Hieroglyphs. Since: 2.32
+ * @G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS: Meroitic Hieroglyphs. Since: 2.32
* @G_UNICODE_SCRIPT_MIAO: Miao. Since: 2.32
* @G_UNICODE_SCRIPT_SHARADA: Sharada. Since: 2.32
* @G_UNICODE_SCRIPT_SORA_SOMPENG: Sora Sompeng. Since: 2.32
- * @G_UNICODE_SCRIPT_TAKRI Takri. Since: 2.32
+ * @G_UNICODE_SCRIPT_TAKRI: Takri. Since: 2.32
+ * @G_UNICODE_SCRIPT_BASSA_VAH: Bassa. Since: 2.42
+ * @G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN: Caucasian Albanian. Since: 2.42
+ * @G_UNICODE_SCRIPT_DUPLOYAN: Duployan. Since: 2.42
+ * @G_UNICODE_SCRIPT_ELBASAN: Elbasan. Since: 2.42
+ * @G_UNICODE_SCRIPT_GRANTHA: Grantha. Since: 2.42
+ * @G_UNICODE_SCRIPT_KHOJKI: Kjohki. Since: 2.42
+ * @G_UNICODE_SCRIPT_KHUDAWADI: Khudawadi, Sindhi. Since: 2.42
+ * @G_UNICODE_SCRIPT_LINEAR_A: Linear A. Since: 2.42
+ * @G_UNICODE_SCRIPT_MAHAJANI: Mahajani. Since: 2.42
+ * @G_UNICODE_SCRIPT_MANICHAEAN: Manichaean. Since: 2.42
+ * @G_UNICODE_SCRIPT_MENDE_KIKAKUI: Mende Kikakui. Since: 2.42
+ * @G_UNICODE_SCRIPT_MODI: Modi. Since: 2.42
+ * @G_UNICODE_SCRIPT_MRO: Mro. Since: 2.42
+ * @G_UNICODE_SCRIPT_NABATAEAN: Nabataean. Since: 2.42
+ * @G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN: Old North Arabian. Since: 2.42
+ * @G_UNICODE_SCRIPT_OLD_PERMIC: Old Permic. Since: 2.42
+ * @G_UNICODE_SCRIPT_PAHAWH_HMONG: Pahawh Hmong. Since: 2.42
+ * @G_UNICODE_SCRIPT_PALMYRENE: Palmyrene. Since: 2.42
+ * @G_UNICODE_SCRIPT_PAU_CIN_HAU: Pau Cin Hau. Since: 2.42
+ * @G_UNICODE_SCRIPT_PSALTER_PAHLAVI: Psalter Pahlavi. Since: 2.42
+ * @G_UNICODE_SCRIPT_SIDDHAM: Siddham. Since: 2.42
+ * @G_UNICODE_SCRIPT_TIRHUTA: Tirhuta. Since: 2.42
+ * @G_UNICODE_SCRIPT_WARANG_CITI Warang Citi. Since: 2.42
*
* The #GUnicodeScript enumeration identifies different writing
* systems. The values correspond to the names as defined in the
G_UNICODE_SCRIPT_MIAO, /* Plrd */
G_UNICODE_SCRIPT_SHARADA, /* Shrd */
G_UNICODE_SCRIPT_SORA_SOMPENG, /* Sora */
- G_UNICODE_SCRIPT_TAKRI /* Takr */
+ G_UNICODE_SCRIPT_TAKRI, /* Takr */
+
+ /* Unicode 7.0 additions */
+ G_UNICODE_SCRIPT_BASSA_VAH, /* Bass */
+ G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, /* Aghb */
+ G_UNICODE_SCRIPT_DUPLOYAN, /* Dupl */
+ G_UNICODE_SCRIPT_ELBASAN, /* Elba */
+ G_UNICODE_SCRIPT_GRANTHA, /* Gran */
+ G_UNICODE_SCRIPT_KHOJKI, /* Khoj */
+ G_UNICODE_SCRIPT_KHUDAWADI, /* Sind */
+ G_UNICODE_SCRIPT_LINEAR_A, /* Lina */
+ G_UNICODE_SCRIPT_MAHAJANI, /* Mahj */
+ G_UNICODE_SCRIPT_MANICHAEAN, /* Manu */
+ G_UNICODE_SCRIPT_MENDE_KIKAKUI, /* Mend */
+ G_UNICODE_SCRIPT_MODI, /* Modi */
+ G_UNICODE_SCRIPT_MRO, /* Mroo */
+ G_UNICODE_SCRIPT_NABATAEAN, /* Nbat */
+ G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, /* Narb */
+ G_UNICODE_SCRIPT_OLD_PERMIC, /* Perm */
+ G_UNICODE_SCRIPT_PAHAWH_HMONG, /* Hmng */
+ G_UNICODE_SCRIPT_PALMYRENE, /* Palm */
+ G_UNICODE_SCRIPT_PAU_CIN_HAU, /* Pauc */
+ G_UNICODE_SCRIPT_PSALTER_PAHLAVI, /* Phlp */
+ G_UNICODE_SCRIPT_SIDDHAM, /* Sidd */
+ G_UNICODE_SCRIPT_TIRHUTA, /* Tirh */
+ G_UNICODE_SCRIPT_WARANG_CITI /* Wara */
} GUnicodeScript;
+GLIB_AVAILABLE_IN_ALL
guint32 g_unicode_script_to_iso15924 (GUnicodeScript script);
+GLIB_AVAILABLE_IN_ALL
GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924);
/* These are all analogs of the <ctype.h> functions.
*/
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_islower (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iswide_cjk(gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iszerowidth(gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_ismark (gunichar c) G_GNUC_CONST;
/* More <ctype.h> functions. These convert between the three cases.
* See the Unicode book to understand title case. */
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
-/* If C is a digit (according to `g_unichar_isdigit'), then return its
+/* If C is a digit (according to 'g_unichar_isdigit'), then return its
numeric value. Otherwise return -1. */
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
/* Return the Unicode character type of a given character. */
+GLIB_AVAILABLE_IN_ALL
GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
/* Return the line break property for a given character */
+GLIB_AVAILABLE_IN_ALL
GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
/* Returns the combining class for a given character */
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_get_mirror_char (gunichar ch,
gunichar *mirrored_ch);
+GLIB_AVAILABLE_IN_ALL
GUnicodeScript g_unichar_get_script (gunichar ch) G_GNUC_CONST;
/* Validate a Unicode character */
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_validate (gunichar ch) G_GNUC_CONST;
/* Pairwise canonical compose/decompose */
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_compose (gunichar a,
gunichar b,
gunichar *ch);
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_decompose (gunichar ch,
gunichar *a,
gunichar *b);
+GLIB_AVAILABLE_IN_ALL
gsize g_unichar_fully_decompose (gunichar ch,
gboolean compat,
gunichar *result,
/* Compute canonical ordering of a string in-place. This rearranges
decomposed characters in the string according to their combining
classes. See the Unicode manual for more information. */
+GLIB_AVAILABLE_IN_ALL
void g_unicode_canonical_ordering (gunichar *string,
gsize len);
-GLIB_DEPRECATED
+GLIB_DEPRECATED_IN_2_30
gunichar *g_unicode_canonical_decomposition (gunichar ch,
gsize *result_len) G_GNUC_MALLOC;
*/
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const guchar *)(p)])
+GLIB_AVAILABLE_IN_ALL
gunichar g_utf8_get_char (const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gunichar g_utf8_get_char_validated (const gchar *p,
gssize max_len) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_offset_to_pointer (const gchar *str,
glong offset) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
glong g_utf8_pointer_to_offset (const gchar *str,
const gchar *pos) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_prev_char (const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_find_next_char (const gchar *p,
const gchar *end) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_find_prev_char (const gchar *str,
const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
glong g_utf8_strlen (const gchar *p,
gssize max) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_2_30
gchar *g_utf8_substring (const gchar *str,
glong start_pos,
glong end_pos) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strncpy (gchar *dest,
const gchar *src,
gsize n);
/* Find the UTF-8 character corresponding to ch, in string p. These
functions are equivalants to strchr and strrchr */
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strchr (const gchar *p,
gssize len,
gunichar c);
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strrchr (const gchar *p,
gssize len,
gunichar c);
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strreverse (const gchar *str,
gssize len);
+GLIB_AVAILABLE_IN_ALL
gunichar2 *g_utf8_to_utf16 (const gchar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf8_to_ucs4 (const gchar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf8_to_ucs4_fast (const gchar *str,
glong len,
glong *items_written) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf16_to_utf8 (const gunichar2 *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar2 *g_ucs4_to_utf16 (const gunichar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar* g_ucs4_to_utf8 (const gunichar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_to_utf8 (gunichar c,
gchar *outbuf);
+GLIB_AVAILABLE_IN_ALL
gboolean g_utf8_validate (const gchar *str,
gssize max_len,
const gchar **end);
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strup (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strdown (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_casefold (const gchar *str,
gssize len) G_GNUC_MALLOC;
G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
} GNormalizeMode;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_normalize (const gchar *str,
gssize len,
GNormalizeMode mode) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gint g_utf8_collate (const gchar *str1,
const gchar *str2) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_collate_key (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_collate_key_for_filename (const gchar *str,
gssize len) G_GNUC_MALLOC;
/* private */
-
gchar *_g_utf8_make_valid (const gchar *name);
G_END_DECLS