docs/reference/glib/tmpl/unicode.sgml

   1 <!-- ##### SECTION Title ##### -->
   2 Unicode Manipulation
   3
   4 <!-- ##### SECTION Short_Description ##### -->
   5 functions operating on Unicode characters and UTF-8 strings
   6
   7 <!-- ##### SECTION Long_Description ##### -->
   8 <para>
   9 This section describes a number of functions for dealing with
  10 Unicode characters and strings.  There are analogues of the
  11 traditional <filename>ctype.h</filename> character classification
  12 and case conversion functions, UTF-8 analogues of some string utility
  13 functions, functions to perform normalization, case conversion and
  14 collation on UTF-8 strings and finally functions to convert between
  15 the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
  16 </para>
  17
  18 <para>
  19 The implementations of the Unicode functions in GLib are based
  20 on the Unicode Character Data tables, which are available from
  21 <ulink url="http://www.unicode.org">www.unicode.org</ulink>.
  22 GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
  23 GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1.
  24 </para>
  25
  26 <!-- ##### SECTION See_Also ##### -->
  27 <para>
  28 <variablelist>
  29
  30 <varlistentry>
  31 <term>g_locale_to_utf8(), g_locale_from_utf8()</term>
  32 <listitem><para>
  33 Convenience functions for converting between UTF-8 and the locale encoding.
  34 </para></listitem>
  35 </varlistentry>
  36
  37 </variablelist>
  38 </para>
  39
  40 <!-- ##### SECTION Stability_Level ##### -->
  41
  42
  43 <!-- ##### TYPEDEF gunichar ##### -->
  44 <para>
  45 A type which can hold any UCS-4 character code.
  46 </para>
  47
  48
  49 <!-- ##### TYPEDEF gunichar2 ##### -->
  50 <para>
  51 A type which can hold any UTF-16 code
  52 point<footnote id="utf16_surrogate_pairs">UTF-16 also has so called
  53 <firstterm>surrogate pairs</firstterm> to encode characters beyond the
  54 BMP as pairs of 16bit numbers. Surrogate pairs cannot be stored in a
  55 single gunichar2 field, but all GLib functions accepting gunichar2 arrays
  56 will correctly interpret surrogate pairs.</footnote>.
  57 </para>
  58
  59
  60 <!-- ##### FUNCTION g_unichar_validate ##### -->
  61 <para>
  62
  63 </para>
  64
  65 @ch:
  66 @Returns:
  67
  68
  69 <!-- ##### FUNCTION g_unichar_isalnum ##### -->
  70 <para>
  71
  72 </para>
  73
  74 @c:
  75 @Returns:
  76
  77
  78 <!-- ##### FUNCTION g_unichar_isalpha ##### -->
  79 <para>
  80
  81 </para>
  82
  83 @c:
  84 @Returns:
  85
  86
  87 <!-- ##### FUNCTION g_unichar_iscntrl ##### -->
  88 <para>
  89
  90 </para>
  91
  92 @c:
  93 @Returns:
  94
  95
  96 <!-- ##### FUNCTION g_unichar_isdefined ##### -->
  97 <para>
  98
  99 </para>
 100
 101 @c:
 102 @Returns:
 103
 104
 105 <!-- ##### FUNCTION g_unichar_isdigit ##### -->
 106 <para>
 107
 108 </para>
 109
 110 @c:
 111 @Returns:
 112
 113
 114 <!-- ##### FUNCTION g_unichar_isgraph ##### -->
 115 <para>
 116
 117 </para>
 118
 119 @c:
 120 @Returns:
 121
 122
 123 <!-- ##### FUNCTION g_unichar_islower ##### -->
 124 <para>
 125
 126 </para>
 127
 128 @c:
 129 @Returns:
 130
 131
 132 <!-- ##### FUNCTION g_unichar_ismark ##### -->
 133 <para>
 134
 135 </para>
 136
 137 @c:
 138 @Returns:
 139
 140
 141 <!-- ##### FUNCTION g_unichar_isprint ##### -->
 142 <para>
 143
 144 </para>
 145
 146 @c:
 147 @Returns:
 148
 149
 150 <!-- ##### FUNCTION g_unichar_ispunct ##### -->
 151 <para>
 152
 153 </para>
 154
 155 @c:
 156 @Returns:
 157
 158
 159 <!-- ##### FUNCTION g_unichar_isspace ##### -->
 160 <para>
 161
 162 </para>
 163
 164 @c:
 165 @Returns:
 166
 167
 168 <!-- ##### FUNCTION g_unichar_istitle ##### -->
 169 <para>
 170
 171 </para>
 172
 173 @c:
 174 @Returns:
 175
 176
 177 <!-- ##### FUNCTION g_unichar_isupper ##### -->
 178 <para>
 179
 180 </para>
 181
 182 @c:
 183 @Returns:
 184
 185
 186 <!-- ##### FUNCTION g_unichar_isxdigit ##### -->
 187 <para>
 188
 189 </para>
 190
 191 @c:
 192 @Returns:
 193
 194
 195 <!-- ##### FUNCTION g_unichar_iswide ##### -->
 196 <para>
 197
 198 </para>
 199
 200 @c:
 201 @Returns:
 202
 203
 204 <!-- ##### FUNCTION g_unichar_iswide_cjk ##### -->
 205 <para>
 206
 207 </para>
 208
 209 @c:
 210 @Returns:
 211
 212
 213 <!-- ##### FUNCTION g_unichar_iszerowidth ##### -->
 214 <para>
 215
 216 </para>
 217
 218 @c:
 219 @Returns:
 220
 221
 222 <!-- ##### FUNCTION g_unichar_toupper ##### -->
 223 <para>
 224
 225 </para>
 226
 227 @c:
 228 @Returns:
 229
 230
 231 <!-- ##### FUNCTION g_unichar_tolower ##### -->
 232 <para>
 233
 234 </para>
 235
 236 @c:
 237 @Returns:
 238
 239
 240 <!-- ##### FUNCTION g_unichar_totitle ##### -->
 241 <para>
 242
 243 </para>
 244
 245 @c:
 246 @Returns:
 247
 248
 249 <!-- ##### FUNCTION g_unichar_digit_value ##### -->
 250 <para>
 251
 252 </para>
 253
 254 @c:
 255 @Returns:
 256
 257
 258 <!-- ##### FUNCTION g_unichar_xdigit_value ##### -->
 259 <para>
 260
 261 </para>
 262
 263 @c:
 264 @Returns:
 265
 266
 267 <!-- ##### ENUM GUnicodeType ##### -->
 268 <para>
 269 These are the possible character classifications from the
 270 Unicode specification.
 271 See <ulink url="http://www.unicode.org/Public/UNIDATA/UnicodeData.html"
 272 >http://www.unicode.org/Public/UNIDATA/UnicodeData.html</ulink>.
 273 </para>
 274
 275 @G_UNICODE_CONTROL: General category "Other, Control" (Cc)
 276 @G_UNICODE_FORMAT: General category "Other, Format" (Cf)
 277 @G_UNICODE_UNASSIGNED: General category "Other, Not Assigned" (Cn)
 278 @G_UNICODE_PRIVATE_USE: General category "Other, Private Use" (Co)
 279 @G_UNICODE_SURROGATE: General category "Other, Surrogate" (Cs)
 280 @G_UNICODE_LOWERCASE_LETTER: General category "Letter, Lowercase" (Ll)
 281 @G_UNICODE_MODIFIER_LETTER: General category "Letter, Modifier" (Lm)
 282 @G_UNICODE_OTHER_LETTER: General category "Letter, Other" (Lo)
 283 @G_UNICODE_TITLECASE_LETTER: General category "Letter, Titlecase" (Lt)
 284 @G_UNICODE_UPPERCASE_LETTER: General category "Letter, Uppercase" (Lu)
 285 @G_UNICODE_COMBINING_MARK: General category "Mark, Spacing Combining" (Mc)
 286 @G_UNICODE_ENCLOSING_MARK: General category "Mark, Enclosing" (Me)
 287 @G_UNICODE_NON_SPACING_MARK: General category "Mark, Nonspacing" (Mn)
 288 @G_UNICODE_DECIMAL_NUMBER: General category "Number, Decimal Digit" (Nd)
 289 @G_UNICODE_LETTER_NUMBER: General category "Number, Letter" (Nl)
 290 @G_UNICODE_OTHER_NUMBER: General category "Number, Other" (No)
 291 @G_UNICODE_CONNECT_PUNCTUATION: General category "Punctuation, Connector" (Pc)
 292 @G_UNICODE_DASH_PUNCTUATION: General category "Punctuation, Dash" (Pd)
 293 @G_UNICODE_CLOSE_PUNCTUATION: General category "Punctuation, Close" (Pe)
 294 @G_UNICODE_FINAL_PUNCTUATION: General category "Punctuation, Final quote" (Pf)
 295 @G_UNICODE_INITIAL_PUNCTUATION: General category "Punctuation, Initial quote" (Pi)
 296 @G_UNICODE_OTHER_PUNCTUATION: General category "Punctuation, Other" (Po)
 297 @G_UNICODE_OPEN_PUNCTUATION: General category "Punctuation, Open" (Ps)
 298 @G_UNICODE_CURRENCY_SYMBOL: General category "Symbol, Currency" (Sc)
 299 @G_UNICODE_MODIFIER_SYMBOL: General category "Symbol, Modifier" (Sk)
 300 @G_UNICODE_MATH_SYMBOL: General category "Symbol, Math" (Sm)
 301 @G_UNICODE_OTHER_SYMBOL: General category "Symbol, Other" (So)
 302 @G_UNICODE_LINE_SEPARATOR: General category "Separator, Line" (Zl)
 303 @G_UNICODE_PARAGRAPH_SEPARATOR: General category "Separator, Paragraph" (Zp)
 304 @G_UNICODE_SPACE_SEPARATOR: General category "Separator, Space" (Zs)
 305
 306 <!-- ##### FUNCTION g_unichar_type ##### -->
 307 <para>
 308
 309 </para>
 310
 311 @c:
 312 @Returns:
 313
 314
 315 <!-- ##### ENUM GUnicodeBreakType ##### -->
 316 <para>
 317 These are the possible line break classifications.
 318 The five Hangul types were added in Unicode 4.1, so, has been
 319 introduced in GLib 2.10.  Note that new types may be added in the future.
 320 Applications should be ready to handle unknown values.
 321 They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
 322 See <ulink url="http://www.unicode.org/unicode/reports/tr14/"
 323 >http://www.unicode.org/unicode/reports/tr14/</ulink>.
 324 </para>
 325
 326 @G_UNICODE_BREAK_MANDATORY:
 327 @G_UNICODE_BREAK_CARRIAGE_RETURN:
 328 @G_UNICODE_BREAK_LINE_FEED:
 329 @G_UNICODE_BREAK_COMBINING_MARK:
 330 @G_UNICODE_BREAK_SURROGATE:
 331 @G_UNICODE_BREAK_ZERO_WIDTH_SPACE:
 332 @G_UNICODE_BREAK_INSEPARABLE:
 333 @G_UNICODE_BREAK_NON_BREAKING_GLUE:
 334 @G_UNICODE_BREAK_CONTINGENT:
 335 @G_UNICODE_BREAK_SPACE:
 336 @G_UNICODE_BREAK_AFTER:
 337 @G_UNICODE_BREAK_BEFORE:
 338 @G_UNICODE_BREAK_BEFORE_AND_AFTER:
 339 @G_UNICODE_BREAK_HYPHEN:
 340 @G_UNICODE_BREAK_NON_STARTER:
 341 @G_UNICODE_BREAK_OPEN_PUNCTUATION:
 342 @G_UNICODE_BREAK_CLOSE_PUNCTUATION:
 343 @G_UNICODE_BREAK_QUOTATION:
 344 @G_UNICODE_BREAK_EXCLAMATION:
 345 @G_UNICODE_BREAK_IDEOGRAPHIC:
 346 @G_UNICODE_BREAK_NUMERIC:
 347 @G_UNICODE_BREAK_INFIX_SEPARATOR:
 348 @G_UNICODE_BREAK_SYMBOL:
 349 @G_UNICODE_BREAK_ALPHABETIC:
 350 @G_UNICODE_BREAK_PREFIX:
 351 @G_UNICODE_BREAK_POSTFIX:
 352 @G_UNICODE_BREAK_COMPLEX_CONTEXT:
 353 @G_UNICODE_BREAK_AMBIGUOUS:
 354 @G_UNICODE_BREAK_UNKNOWN:
 355 @G_UNICODE_BREAK_NEXT_LINE:
 356 @G_UNICODE_BREAK_WORD_JOINER:
 357 @G_UNICODE_BREAK_HANGUL_L_JAMO:
 358 @G_UNICODE_BREAK_HANGUL_V_JAMO:
 359 @G_UNICODE_BREAK_HANGUL_T_JAMO:
 360 @G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
 361 @G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
 362
 363 <!-- ##### FUNCTION g_unichar_break_type ##### -->
 364 <para>
 365
 366 </para>
 367
 368 @c:
 369 @Returns:
 370
 371
 372 <!-- ##### FUNCTION g_unichar_combining_class ##### -->
 373 <para>
 374
 375 </para>
 376
 377 @uc:
 378 @Returns:
 379
 380
 381 <!-- ##### FUNCTION g_unicode_canonical_ordering ##### -->
 382 <para>
 383
 384 </para>
 385
 386 @string:
 387 @len:
 388
 389
 390 <!-- ##### FUNCTION g_unicode_canonical_decomposition ##### -->
 391 <para>
 392
 393 </para>
 394
 395 @ch:
 396 @result_len:
 397 @Returns:
 398
 399
 400 <!-- ##### FUNCTION g_unichar_get_mirror_char ##### -->
 401 <para>
 402
 403 </para>
 404
 405 @ch:
 406 @mirrored_ch:
 407 @Returns:
 408
 409
 410 <!-- ##### ENUM GUnicodeScript ##### -->
 411 <para>
 412 The #GUnicodeScript enumeration identifies different writing
 413 systems. The values correspond to the names as defined in the
 414 Unicode standard. The enumeration has been added in GLib 2.14,
 415 and is interchangeable with #PangoScript.
 416 Note that new types may be added in the future. Applications
 417 should be ready to handle unknown values.
 418 See <ulink
 419 url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex
 420 #24: Script names</ulink>.
 421 </para>
 422
 423 @G_UNICODE_SCRIPT_INVALID_CODE: a value never returned from g_unichar_get_script()
 424 @G_UNICODE_SCRIPT_COMMON:     a character used by multiple different scripts
 425 @G_UNICODE_SCRIPT_INHERITED:  a mark glyph that takes its script from the
 426                               base glyph to which it is attached
 427 @G_UNICODE_SCRIPT_ARABIC:     Arabic
 428 @G_UNICODE_SCRIPT_ARMENIAN:   Armenian
 429 @G_UNICODE_SCRIPT_BENGALI:    Bengali
 430 @G_UNICODE_SCRIPT_BOPOMOFO:   Bopomofo
 431 @G_UNICODE_SCRIPT_CHEROKEE:   Cherokee
 432 @G_UNICODE_SCRIPT_COPTIC:     Coptic
 433 @G_UNICODE_SCRIPT_CYRILLIC:   Cyrillic
 434 @G_UNICODE_SCRIPT_DESERET:    Deseret
 435 @G_UNICODE_SCRIPT_DEVANAGARI: Devanagari
 436 @G_UNICODE_SCRIPT_ETHIOPIC:   Ethiopic
 437 @G_UNICODE_SCRIPT_GEORGIAN:   Georgian
 438 @G_UNICODE_SCRIPT_GOTHIC:     Gothic
 439 @G_UNICODE_SCRIPT_GREEK:      Greek
 440 @G_UNICODE_SCRIPT_GUJARATI:   Gujarati
 441 @G_UNICODE_SCRIPT_GURMUKHI:   Gurmukhi
 442 @G_UNICODE_SCRIPT_HAN:        Han
 443 @G_UNICODE_SCRIPT_HANGUL:     Hangul
 444 @G_UNICODE_SCRIPT_HEBREW:     Hebrew
 445 @G_UNICODE_SCRIPT_HIRAGANA:   Hiragana
 446 @G_UNICODE_SCRIPT_KANNADA:    Kannada
 447 @G_UNICODE_SCRIPT_KATAKANA:   Katakana
 448 @G_UNICODE_SCRIPT_KHMER:      Khmer
 449 @G_UNICODE_SCRIPT_LAO:        Lao
 450 @G_UNICODE_SCRIPT_LATIN:      Latin
 451 @G_UNICODE_SCRIPT_MALAYALAM:  Malayalam
 452 @G_UNICODE_SCRIPT_MONGOLIAN:  Mongolian
 453 @G_UNICODE_SCRIPT_MYANMAR:    Myanmar
 454 @G_UNICODE_SCRIPT_OGHAM:      Ogham
 455 @G_UNICODE_SCRIPT_OLD_ITALIC: Old Italic
 456 @G_UNICODE_SCRIPT_ORIYA:      Oriya
 457 @G_UNICODE_SCRIPT_RUNIC:      Runic
 458 @G_UNICODE_SCRIPT_SINHALA:    Sinhala
 459 @G_UNICODE_SCRIPT_SYRIAC:     Syriac
 460 @G_UNICODE_SCRIPT_TAMIL:      Tamil
 461 @G_UNICODE_SCRIPT_TELUGU:     Telugu
 462 @G_UNICODE_SCRIPT_THAANA:     Thaana
 463 @G_UNICODE_SCRIPT_THAI:       Thai
 464 @G_UNICODE_SCRIPT_TIBETAN:    Tibetan
 465 @G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL:
 466                               Canadian Aboriginal
 467 @G_UNICODE_SCRIPT_YI:         Yi
 468 @G_UNICODE_SCRIPT_TAGALOG:    Tagalog
 469 @G_UNICODE_SCRIPT_HANUNOO:    Hanunoo
 470 @G_UNICODE_SCRIPT_BUHID:      Buhid
 471 @G_UNICODE_SCRIPT_TAGBANWA:   Tagbanwa
 472 @G_UNICODE_SCRIPT_BRAILLE:    Braille
 473 @G_UNICODE_SCRIPT_CYPRIOT:    Cypriot
 474 @G_UNICODE_SCRIPT_LIMBU:      Limbu
 475 @G_UNICODE_SCRIPT_OSMANYA:    Osmanya
 476 @G_UNICODE_SCRIPT_SHAVIAN:    Shavian
 477 @G_UNICODE_SCRIPT_LINEAR_B:   Linear B
 478 @G_UNICODE_SCRIPT_TAI_LE:     Tai Le
 479 @G_UNICODE_SCRIPT_UGARITIC:   Ugaritic
 480 @G_UNICODE_SCRIPT_NEW_TAI_LUE: New Tai Lue
 481 @G_UNICODE_SCRIPT_BUGINESE:   Buginese
 482 @G_UNICODE_SCRIPT_GLAGOLITIC: Glagolitic
 483 @G_UNICODE_SCRIPT_TIFINAGH:   Tifinagh
 484 @G_UNICODE_SCRIPT_SYLOTI_NAGRI: Syloti Nagri
 485 @G_UNICODE_SCRIPT_OLD_PERSIAN: Old Persian
 486 @G_UNICODE_SCRIPT_KHAROSHTHI: Kharoshthi
 487 @G_UNICODE_SCRIPT_UNKNOWN:    an unassigned code point
 488 @G_UNICODE_SCRIPT_BALINESE:   Balinese
 489 @G_UNICODE_SCRIPT_CUNEIFORM:  Cuneiform
 490 @G_UNICODE_SCRIPT_PHOENICIAN: Phoenician
 491 @G_UNICODE_SCRIPT_PHAGS_PA:   Phags-pa
 492 @G_UNICODE_SCRIPT_NKO:        N'Ko
 493 @G_UNICODE_SCRIPT_KAYAH_LI:   Kayah Li. Since 2.16.3
 494 @G_UNICODE_SCRIPT_LEPCHA:     Lepcha. Since 2.16.3
 495 @G_UNICODE_SCRIPT_REJANG:     Rejang. Since 2.16.3
 496 @G_UNICODE_SCRIPT_SUNDANESE:  Sundanese. Since 2.16.3
 497 @G_UNICODE_SCRIPT_SAURASHTRA: Saurashtra. Since 2.16.3
 498 @G_UNICODE_SCRIPT_CHAM:       Cham. Since 2.16.3
 499 @G_UNICODE_SCRIPT_OL_CHIKI:   Ol Chiki. Since 2.16.3
 500 @G_UNICODE_SCRIPT_VAI:        Vai. Since 2.16.3
 501 @G_UNICODE_SCRIPT_CARIAN:     Carian. Since 2.16.3
 502 @G_UNICODE_SCRIPT_LYCIAN:     Lycian. Since 2.16.3
 503 @G_UNICODE_SCRIPT_LYDIAN:     Lydian. Since 2.16.3
 504
 505 <!-- ##### FUNCTION g_unichar_get_script ##### -->
 506 <para>
 507
 508 </para>
 509
 510 @ch:
 511 @Returns:
 512
 513
 514 <!-- ##### MACRO g_utf8_next_char ##### -->
 515 <para>
 516 Skips to the next character in a UTF-8 string. The string must be
 517 valid; this macro is as fast as possible, and has no error-checking.
 518 You would use this macro to iterate over a string character by
 519 character. The macro returns the start of the next UTF-8 character.
 520 Before using this macro, use g_utf8_validate() to validate strings
 521 that may contain invalid UTF-8.
 522 </para>
 523
 524 @p: Pointer to the start of a valid UTF-8 character.
 525
 526
 527 <!-- ##### FUNCTION g_utf8_get_char ##### -->
 528 <para>
 529
 530 </para>
 531
 532 @p:
 533 @Returns:
 534
 535
 536 <!-- ##### FUNCTION g_utf8_get_char_validated ##### -->
 537 <para>
 538
 539 </para>
 540
 541 @p:
 542 @max_len:
 543 @Returns:
 544
 545
 546 <!-- ##### FUNCTION g_utf8_offset_to_pointer ##### -->
 547 <para>
 548
 549 </para>
 550
 551 @str:
 552 @offset:
 553 @Returns:
 554
 555
 556 <!-- ##### FUNCTION g_utf8_pointer_to_offset ##### -->
 557 <para>
 558
 559 </para>
 560
 561 @str:
 562 @pos:
 563 @Returns:
 564
 565
 566 <!-- ##### FUNCTION g_utf8_prev_char ##### -->
 567 <para>
 568
 569 </para>
 570
 571 @p:
 572 @Returns:
 573
 574
 575 <!-- ##### FUNCTION g_utf8_find_next_char ##### -->
 576 <para>
 577
 578 </para>
 579
 580 @p:
 581 @end:
 582 @Returns:
 583
 584
 585 <!-- ##### FUNCTION g_utf8_find_prev_char ##### -->
 586 <para>
 587
 588 </para>
 589
 590 @str:
 591 @p:
 592 @Returns:
 593
 594
 595 <!-- ##### FUNCTION g_utf8_strlen ##### -->
 596 <para>
 597
 598 </para>
 599
 600 @p:
 601 @max:
 602 @Returns:
 603
 604
 605 <!-- ##### FUNCTION g_utf8_strncpy ##### -->
 606 <para>
 607
 608 </para>
 609
 610 @dest:
 611 @src:
 612 @n:
 613 @Returns:
 614
 615
 616 <!-- ##### FUNCTION g_utf8_strchr ##### -->
 617 <para>
 618
 619 </para>
 620
 621 @p:
 622 @len:
 623 @c:
 624 @Returns:
 625
 626
 627 <!-- ##### FUNCTION g_utf8_strrchr ##### -->
 628 <para>
 629
 630 </para>
 631
 632 @p:
 633 @len:
 634 @c:
 635 @Returns:
 636
 637
 638 <!-- ##### FUNCTION g_utf8_strreverse ##### -->
 639 <para>
 640
 641 </para>
 642
 643 @str:
 644 @len:
 645 @Returns:
 646
 647
 648 <!-- ##### FUNCTION g_utf8_validate ##### -->
 649 <para>
 650
 651 </para>
 652
 653 @str:
 654 @max_len:
 655 @end:
 656 @Returns:
 657
 658
 659 <!-- ##### FUNCTION g_utf8_strup ##### -->
 660 <para>
 661
 662 </para>
 663
 664 @str:
 665 @len:
 666 @Returns:
 667
 668
 669 <!-- ##### FUNCTION g_utf8_strdown ##### -->
 670 <para>
 671
 672 </para>
 673
 674 @str:
 675 @len:
 676 @Returns:
 677
 678
 679 <!-- ##### FUNCTION g_utf8_casefold ##### -->
 680 <para>
 681
 682 </para>
 683
 684 @str:
 685 @len:
 686 @Returns:
 687
 688
 689 <!-- ##### FUNCTION g_utf8_normalize ##### -->
 690 <para>
 691
 692 </para>
 693
 694 @str:
 695 @len:
 696 @mode:
 697 @Returns:
 698
 699
 700 <!-- ##### ENUM GNormalizeMode ##### -->
 701 <para>
 702 Defines how a Unicode string is transformed in a canonical
 703 form, standardizing such issues as whether a character with an accent is
 704 represented as a base character and combining accent or as a single precomposed
 705 character. Unicode strings should generally be normalized before comparing them.
 706 </para>
 707
 708 @G_NORMALIZE_DEFAULT: standardize differences that do not affect the
 709   text content, such as the above-mentioned accent representation.
 710 @G_NORMALIZE_NFD: another name for %G_NORMALIZE_DEFAULT.
 711 @G_NORMALIZE_DEFAULT_COMPOSE: like %G_NORMALIZE_DEFAULT, but with composed
 712   forms rather than a maximally decomposed form.
 713 @G_NORMALIZE_NFC: another name for %G_NORMALIZE_DEFAULT_COMPOSE.
 714 @G_NORMALIZE_ALL: beyond %G_NORMALIZE_DEFAULT also standardize the
 715   "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the
 716   standard forms (in this case DIGIT THREE). Formatting information may be
 717   lost but for most text operations such characters should be considered the
 718   same.
 719 @G_NORMALIZE_NFKD: another name for %G_NORMALIZE_ALL.
 720 @G_NORMALIZE_ALL_COMPOSE: like %G_NORMALIZE_ALL, but with composed
 721   forms rather than a maximally decomposed form.
 722 @G_NORMALIZE_NFKC: another name for %G_NORMALIZE_ALL_COMPOSE.
 723
 724 <!-- ##### FUNCTION g_utf8_collate ##### -->
 725 <para>
 726
 727 </para>
 728
 729 @str1:
 730 @str2:
 731 @Returns:
 732
 733
 734 <!-- ##### FUNCTION g_utf8_collate_key ##### -->
 735 <para>
 736
 737 </para>
 738
 739 @str:
 740 @len:
 741 @Returns:
 742
 743
 744 <!-- ##### FUNCTION g_utf8_collate_key_for_filename ##### -->
 745 <para>
 746
 747 </para>
 748
 749 @str:
 750 @len:
 751 @Returns:
 752
 753
 754 <!-- ##### FUNCTION g_utf8_to_utf16 ##### -->
 755 <para>
 756
 757 </para>
 758
 759 @str:
 760 @len:
 761 @items_read:
 762 @items_written:
 763 @error:
 764 @Returns:
 765
 766
 767 <!-- ##### FUNCTION g_utf8_to_ucs4 ##### -->
 768 <para>
 769
 770 </para>
 771
 772 @str:
 773 @len:
 774 @items_read:
 775 @items_written:
 776 @error:
 777 @Returns:
 778
 779
 780 <!-- ##### FUNCTION g_utf8_to_ucs4_fast ##### -->
 781 <para>
 782
 783 </para>
 784
 785 @str:
 786 @len:
 787 @items_written:
 788 @Returns:
 789
 790
 791 <!-- ##### FUNCTION g_utf16_to_ucs4 ##### -->
 792 <para>
 793
 794 </para>
 795
 796 @str:
 797 @len:
 798 @items_read:
 799 @items_written:
 800 @error:
 801 @Returns:
 802
 803
 804 <!-- ##### FUNCTION g_utf16_to_utf8 ##### -->
 805 <para>
 806
 807 </para>
 808
 809 @str:
 810 @len:
 811 @items_read:
 812 @items_written:
 813 @error:
 814 @Returns:
 815
 816
 817 <!-- ##### FUNCTION g_ucs4_to_utf16 ##### -->
 818 <para>
 819
 820 </para>
 821
 822 @str:
 823 @len:
 824 @items_read:
 825 @items_written:
 826 @error:
 827 @Returns:
 828
 829
 830 <!-- ##### FUNCTION g_ucs4_to_utf8 ##### -->
 831 <para>
 832
 833 </para>
 834
 835 @str:
 836 @len:
 837 @items_read:
 838 @items_written:
 839 @error:
 840 @Returns:
 841
 842
 843 <!-- ##### FUNCTION g_unichar_to_utf8 ##### -->
 844 <para>
 845
 846 </para>
 847
 848 @c:
 849 @outbuf:
 850 @Returns:
 851
 852