*
* You should have received a copy of the GNU Lesser General Public
* License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * see <http://www.gnu.org/licenses/>.
*/
+#ifndef __G_UNICODE_H__
+#define __G_UNICODE_H__
+
#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
#error "Only <glib.h> can be included directly."
#endif
-#ifndef __G_UNICODE_H__
-#define __G_UNICODE_H__
-
#include <glib/gerror.h>
#include <glib/gtypes.h>
* @G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: Hangul LV Syllable (H2)
* @G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: Hangul LVT Syllable (H3)
* @G_UNICODE_BREAK_CLOSE_PARANTHESIS: Closing Parenthesis (CP). Since 2.28
+ * @G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER: Conditional Japanese Starter (CJ). Since: 2.32
+ * @G_UNICODE_BREAK_HEBREW_LETTER: Hebrew Letter (HL). Since: 2.32
+ * @G_UNICODE_BREAK_REGIONAL_INDICATOR: Regional Indicator (RI). Since: 2.36
*
* These are the possible line break classifications.
*
- * The five Hangul types were added in Unicode 4.1, so, has been
- * introduced in GLib 2.10. Note that new types may be added in the future.
- * Applications should be ready to handle unknown values.
- * They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
+ * Since new unicode versions may add new types here, applications should be ready
+ * to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
*
* See <ulink url="http://www.unicode.org/unicode/reports/tr14/">http://www.unicode.org/unicode/reports/tr14/</ulink>.
*/
G_UNICODE_BREAK_HANGUL_T_JAMO,
G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE,
- G_UNICODE_BREAK_CLOSE_PARANTHESIS
+ G_UNICODE_BREAK_CLOSE_PARANTHESIS,
+ G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER,
+ G_UNICODE_BREAK_HEBREW_LETTER,
+ G_UNICODE_BREAK_REGIONAL_INDICATOR
} GUnicodeBreakType;
/**
* @G_UNICODE_SCRIPT_BATAK: Batak. Since 2.28
* @G_UNICODE_SCRIPT_BRAHMI: Brahmi. Since 2.28
* @G_UNICODE_SCRIPT_MANDAIC: Mandaic. Since 2.28
+ * @G_UNICODE_SCRIPT_CHAKMA: Chakma. Since: 2.32
+ * @G_UNICODE_SCRIPT_MEROITIC_CURSIVE: Meroitic Cursive. Since: 2.32
+ * @G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS: Meroitic Hieroglyphs. Since: 2.32
+ * @G_UNICODE_SCRIPT_MIAO: Miao. Since: 2.32
+ * @G_UNICODE_SCRIPT_SHARADA: Sharada. Since: 2.32
+ * @G_UNICODE_SCRIPT_SORA_SOMPENG: Sora Sompeng. Since: 2.32
+ * @G_UNICODE_SCRIPT_TAKRI: Takri. Since: 2.32
*
* The #GUnicodeScript enumeration identifies different writing
* systems. The values correspond to the names as defined in the
{ /* ISO 15924 code */
G_UNICODE_SCRIPT_INVALID_CODE = -1,
G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */
- G_UNICODE_SCRIPT_INHERITED, /* Qaai */
+ G_UNICODE_SCRIPT_INHERITED, /* Zinh (Qaai) */
G_UNICODE_SCRIPT_ARABIC, /* Arab */
G_UNICODE_SCRIPT_ARMENIAN, /* Armn */
G_UNICODE_SCRIPT_BENGALI, /* Beng */
G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */
G_UNICODE_SCRIPT_CHEROKEE, /* Cher */
- G_UNICODE_SCRIPT_COPTIC, /* Qaac */
+ G_UNICODE_SCRIPT_COPTIC, /* Copt (Qaac) */
G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */
G_UNICODE_SCRIPT_DESERET, /* Dsrt */
G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */
/* Unicode-6.0 additions */
G_UNICODE_SCRIPT_BATAK, /* Batk */
G_UNICODE_SCRIPT_BRAHMI, /* Brah */
- G_UNICODE_SCRIPT_MANDAIC /* Mand */
+ G_UNICODE_SCRIPT_MANDAIC, /* Mand */
+
+ /* Unicode-6.1 additions */
+ G_UNICODE_SCRIPT_CHAKMA, /* Cakm */
+ G_UNICODE_SCRIPT_MEROITIC_CURSIVE, /* Merc */
+ G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, /* Mero */
+ G_UNICODE_SCRIPT_MIAO, /* Plrd */
+ G_UNICODE_SCRIPT_SHARADA, /* Shrd */
+ G_UNICODE_SCRIPT_SORA_SOMPENG, /* Sora */
+ G_UNICODE_SCRIPT_TAKRI /* Takr */
} GUnicodeScript;
+GLIB_AVAILABLE_IN_ALL
guint32 g_unicode_script_to_iso15924 (GUnicodeScript script);
+GLIB_AVAILABLE_IN_ALL
GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924);
/* These are all analogs of the <ctype.h> functions.
*/
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_islower (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iswide_cjk(gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_iszerowidth(gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_ismark (gunichar c) G_GNUC_CONST;
/* More <ctype.h> functions. These convert between the three cases.
* See the Unicode book to understand title case. */
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
-/* If C is a digit (according to `g_unichar_isdigit'), then return its
+/* If C is a digit (according to 'g_unichar_isdigit'), then return its
numeric value. Otherwise return -1. */
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
/* Return the Unicode character type of a given character. */
+GLIB_AVAILABLE_IN_ALL
GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
/* Return the line break property for a given character */
+GLIB_AVAILABLE_IN_ALL
GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
/* Returns the combining class for a given character */
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_get_mirror_char (gunichar ch,
gunichar *mirrored_ch);
+GLIB_AVAILABLE_IN_ALL
GUnicodeScript g_unichar_get_script (gunichar ch) G_GNUC_CONST;
/* Validate a Unicode character */
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_validate (gunichar ch) G_GNUC_CONST;
/* Pairwise canonical compose/decompose */
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_compose (gunichar a,
gunichar b,
gunichar *ch);
+GLIB_AVAILABLE_IN_ALL
gboolean g_unichar_decompose (gunichar ch,
gunichar *a,
gunichar *b);
+GLIB_AVAILABLE_IN_ALL
gsize g_unichar_fully_decompose (gunichar ch,
gboolean compat,
gunichar *result,
gsize result_len);
+/**
+ * G_UNICHAR_MAX_DECOMPOSITION_LENGTH:
+ *
+ * The maximum length (in codepoints) of a compatibility or canonical
+ * decomposition of a single Unicode character.
+ *
+ * This is as defined by Unicode 6.1.
+ *
+ * Since: 2.32
+ */
+#define G_UNICHAR_MAX_DECOMPOSITION_LENGTH 18 /* codepoints */
+
/* Compute canonical ordering of a string in-place. This rearranges
decomposed characters in the string according to their combining
classes. See the Unicode manual for more information. */
+GLIB_AVAILABLE_IN_ALL
void g_unicode_canonical_ordering (gunichar *string,
gsize len);
-GLIB_DEPRECATED
+GLIB_DEPRECATED_IN_2_30
gunichar *g_unicode_canonical_decomposition (gunichar ch,
gsize *result_len) G_GNUC_MALLOC;
*/
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const guchar *)(p)])
+GLIB_AVAILABLE_IN_ALL
gunichar g_utf8_get_char (const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gunichar g_utf8_get_char_validated (const gchar *p,
gssize max_len) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_offset_to_pointer (const gchar *str,
glong offset) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
glong g_utf8_pointer_to_offset (const gchar *str,
const gchar *pos) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_prev_char (const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_find_next_char (const gchar *p,
const gchar *end) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_find_prev_char (const gchar *str,
const gchar *p) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
glong g_utf8_strlen (const gchar *p,
gssize max) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_2_30
gchar *g_utf8_substring (const gchar *str,
glong start_pos,
glong end_pos) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strncpy (gchar *dest,
const gchar *src,
gsize n);
/* Find the UTF-8 character corresponding to ch, in string p. These
functions are equivalants to strchr and strrchr */
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strchr (const gchar *p,
gssize len,
gunichar c);
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strrchr (const gchar *p,
gssize len,
gunichar c);
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf8_strreverse (const gchar *str,
gssize len);
+GLIB_AVAILABLE_IN_ALL
gunichar2 *g_utf8_to_utf16 (const gchar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf8_to_ucs4 (const gchar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf8_to_ucs4_fast (const gchar *str,
glong len,
glong *items_written) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar* g_utf16_to_utf8 (const gunichar2 *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gunichar2 *g_ucs4_to_utf16 (const gunichar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar* g_ucs4_to_utf8 (const gunichar *str,
glong len,
glong *items_read,
glong *items_written,
GError **error) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gint g_unichar_to_utf8 (gunichar c,
gchar *outbuf);
+GLIB_AVAILABLE_IN_ALL
gboolean g_utf8_validate (const gchar *str,
gssize max_len,
const gchar **end);
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strup (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_strdown (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_casefold (const gchar *str,
gssize len) G_GNUC_MALLOC;
G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
} GNormalizeMode;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_normalize (const gchar *str,
gssize len,
GNormalizeMode mode) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gint g_utf8_collate (const gchar *str1,
const gchar *str2) G_GNUC_PURE;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_collate_key (const gchar *str,
gssize len) G_GNUC_MALLOC;
+GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_collate_key_for_filename (const gchar *str,
gssize len) G_GNUC_MALLOC;
/* private */
-
gchar *_g_utf8_make_valid (const gchar *name);
G_END_DECLS