docs/reference/glib/tmpl/unicode.sgml

   1 <!-- ##### SECTION Title ##### -->
   2 Unicode Manipulation
   3
   4 <!-- ##### SECTION Short_Description ##### -->
   5
   6
   7 <!-- ##### SECTION Long_Description ##### -->
   8 <para>
   9
  10 </para>
  11
  12 <!-- ##### SECTION See_Also ##### -->
  13 <para>
  14
  15 </para>
  16
  17 <!-- ##### TYPEDEF gunichar ##### -->
  18 <para>
  19 A type which can hold any UCS-4 character code.
  20 </para>
  21
  22
  23 <!-- ##### TYPEDEF gunichar2 ##### -->
  24 <para>
  25 A type which can hold any UTF-16 character code.
  26 </para>
  27
  28
  29 <!-- ##### FUNCTION g_get_charset ##### -->
  30 <para>
  31
  32 </para>
  33
  34 @charset:
  35 @Returns:
  36
  37
  38 <!-- ##### FUNCTION g_unichar_validate ##### -->
  39 <para>
  40
  41 </para>
  42
  43 @ch:
  44 @Returns:
  45
  46
  47 <!-- ##### FUNCTION g_unichar_isalnum ##### -->
  48 <para>
  49
  50 </para>
  51
  52 @c:
  53 @Returns:
  54
  55
  56 <!-- ##### FUNCTION g_unichar_isalpha ##### -->
  57 <para>
  58
  59 </para>
  60
  61 @c:
  62 @Returns:
  63
  64
  65 <!-- ##### FUNCTION g_unichar_iscntrl ##### -->
  66 <para>
  67
  68 </para>
  69
  70 @c:
  71 @Returns:
  72
  73
  74 <!-- ##### FUNCTION g_unichar_isdigit ##### -->
  75 <para>
  76
  77 </para>
  78
  79 @c:
  80 @Returns:
  81
  82
  83 <!-- ##### FUNCTION g_unichar_isgraph ##### -->
  84 <para>
  85
  86 </para>
  87
  88 @c:
  89 @Returns:
  90
  91
  92 <!-- ##### FUNCTION g_unichar_islower ##### -->
  93 <para>
  94
  95 </para>
  96
  97 @c:
  98 @Returns:
  99
 100
 101 <!-- ##### FUNCTION g_unichar_isprint ##### -->
 102 <para>
 103
 104 </para>
 105
 106 @c:
 107 @Returns:
 108
 109
 110 <!-- ##### FUNCTION g_unichar_ispunct ##### -->
 111 <para>
 112
 113 </para>
 114
 115 @c:
 116 @Returns:
 117
 118
 119 <!-- ##### FUNCTION g_unichar_isspace ##### -->
 120 <para>
 121
 122 </para>
 123
 124 @c:
 125 @Returns:
 126
 127
 128 <!-- ##### FUNCTION g_unichar_isupper ##### -->
 129 <para>
 130
 131 </para>
 132
 133 @c:
 134 @Returns:
 135
 136
 137 <!-- ##### FUNCTION g_unichar_isxdigit ##### -->
 138 <para>
 139
 140 </para>
 141
 142 @c:
 143 @Returns:
 144
 145
 146 <!-- ##### FUNCTION g_unichar_istitle ##### -->
 147 <para>
 148
 149 </para>
 150
 151 @c:
 152 @Returns:
 153
 154
 155 <!-- ##### FUNCTION g_unichar_isdefined ##### -->
 156 <para>
 157
 158 </para>
 159
 160 @c:
 161 @Returns:
 162
 163
 164 <!-- ##### FUNCTION g_unichar_iswide ##### -->
 165 <para>
 166
 167 </para>
 168
 169 @c:
 170 @Returns:
 171
 172
 173 <!-- ##### FUNCTION g_unichar_toupper ##### -->
 174 <para>
 175
 176 </para>
 177
 178 @c:
 179 @Returns:
 180
 181
 182 <!-- ##### FUNCTION g_unichar_tolower ##### -->
 183 <para>
 184
 185 </para>
 186
 187 @c:
 188 @Returns:
 189
 190
 191 <!-- ##### FUNCTION g_unichar_totitle ##### -->
 192 <para>
 193
 194 </para>
 195
 196 @c:
 197 @Returns:
 198
 199
 200 <!-- ##### FUNCTION g_unichar_digit_value ##### -->
 201 <para>
 202
 203 </para>
 204
 205 @c:
 206 @Returns:
 207
 208
 209 <!-- ##### FUNCTION g_unichar_xdigit_value ##### -->
 210 <para>
 211
 212 </para>
 213
 214 @c:
 215 @Returns:
 216
 217
 218 <!-- ##### ENUM GUnicodeType ##### -->
 219 <para>
 220 These are the possible character classifications.
 221 See <ulink url="http://www.unicode.org/Public/UNIDATA/UnicodeData.html"
 222 >http://www.unicode.org/Public/UNIDATA/UnicodeData.html</ulink>.
 223 </para>
 224
 225 @G_UNICODE_CONTROL:
 226 @G_UNICODE_FORMAT:
 227 @G_UNICODE_UNASSIGNED:
 228 @G_UNICODE_PRIVATE_USE:
 229 @G_UNICODE_SURROGATE:
 230 @G_UNICODE_LOWERCASE_LETTER:
 231 @G_UNICODE_MODIFIER_LETTER:
 232 @G_UNICODE_OTHER_LETTER:
 233 @G_UNICODE_TITLECASE_LETTER:
 234 @G_UNICODE_UPPERCASE_LETTER:
 235 @G_UNICODE_COMBINING_MARK:
 236 @G_UNICODE_ENCLOSING_MARK:
 237 @G_UNICODE_NON_SPACING_MARK:
 238 @G_UNICODE_DECIMAL_NUMBER:
 239 @G_UNICODE_LETTER_NUMBER:
 240 @G_UNICODE_OTHER_NUMBER:
 241 @G_UNICODE_CONNECT_PUNCTUATION:
 242 @G_UNICODE_DASH_PUNCTUATION:
 243 @G_UNICODE_CLOSE_PUNCTUATION:
 244 @G_UNICODE_FINAL_PUNCTUATION:
 245 @G_UNICODE_INITIAL_PUNCTUATION:
 246 @G_UNICODE_OTHER_PUNCTUATION:
 247 @G_UNICODE_OPEN_PUNCTUATION:
 248 @G_UNICODE_CURRENCY_SYMBOL:
 249 @G_UNICODE_MODIFIER_SYMBOL:
 250 @G_UNICODE_MATH_SYMBOL:
 251 @G_UNICODE_OTHER_SYMBOL:
 252 @G_UNICODE_LINE_SEPARATOR:
 253 @G_UNICODE_PARAGRAPH_SEPARATOR:
 254 @G_UNICODE_SPACE_SEPARATOR:
 255
 256 <!-- ##### FUNCTION g_unichar_type ##### -->
 257 <para>
 258
 259 </para>
 260
 261 @c:
 262 @Returns:
 263
 264
 265 <!-- ##### ENUM GUnicodeBreakType ##### -->
 266 <para>
 267 These are the possible line break classifications.
 268 See <ulink url="http://www.unicode.org/unicode/reports/tr14/"
 269 >http://www.unicode.org/unicode/reports/tr14/</ulink>.
 270 </para>
 271
 272 @G_UNICODE_BREAK_MANDATORY:
 273 @G_UNICODE_BREAK_CARRIAGE_RETURN:
 274 @G_UNICODE_BREAK_LINE_FEED:
 275 @G_UNICODE_BREAK_COMBINING_MARK:
 276 @G_UNICODE_BREAK_SURROGATE:
 277 @G_UNICODE_BREAK_ZERO_WIDTH_SPACE:
 278 @G_UNICODE_BREAK_INSEPARABLE:
 279 @G_UNICODE_BREAK_NON_BREAKING_GLUE:
 280 @G_UNICODE_BREAK_CONTINGENT:
 281 @G_UNICODE_BREAK_SPACE:
 282 @G_UNICODE_BREAK_AFTER:
 283 @G_UNICODE_BREAK_BEFORE:
 284 @G_UNICODE_BREAK_BEFORE_AND_AFTER:
 285 @G_UNICODE_BREAK_HYPHEN:
 286 @G_UNICODE_BREAK_NON_STARTER:
 287 @G_UNICODE_BREAK_OPEN_PUNCTUATION:
 288 @G_UNICODE_BREAK_CLOSE_PUNCTUATION:
 289 @G_UNICODE_BREAK_QUOTATION:
 290 @G_UNICODE_BREAK_EXCLAMATION:
 291 @G_UNICODE_BREAK_IDEOGRAPHIC:
 292 @G_UNICODE_BREAK_NUMERIC:
 293 @G_UNICODE_BREAK_INFIX_SEPARATOR:
 294 @G_UNICODE_BREAK_SYMBOL:
 295 @G_UNICODE_BREAK_ALPHABETIC:
 296 @G_UNICODE_BREAK_PREFIX:
 297 @G_UNICODE_BREAK_POSTFIX:
 298 @G_UNICODE_BREAK_COMPLEX_CONTEXT:
 299 @G_UNICODE_BREAK_AMBIGUOUS:
 300 @G_UNICODE_BREAK_UNKNOWN:
 301
 302 <!-- ##### FUNCTION g_unichar_break_type ##### -->
 303 <para>
 304
 305 </para>
 306
 307 @c:
 308 @Returns:
 309
 310
 311 <!-- ##### FUNCTION g_unicode_canonical_ordering ##### -->
 312 <para>
 313
 314 </para>
 315
 316 @string:
 317 @len:
 318
 319
 320 <!-- ##### FUNCTION g_unicode_canonical_decomposition ##### -->
 321 <para>
 322
 323 </para>
 324
 325 @ch:
 326 @result_len:
 327 @Returns:
 328
 329
 330 <!-- ##### MACRO g_utf8_next_char ##### -->
 331 <para>
 332 Skips to the next character in a UTF-8 string. The string must be
 333 valid; this macro is as fast as possible, and has zero error-checking.
 334 You would use this macro to iterate over a string character by
 335 character. The macro returns the start of the next UTF-8 character.
 336 Before using this macro, use g_utf8_validate() to validate strings
 337 that may contain invalid UTF-8.
 338 </para>
 339
 340 @p: Pointer to the start of a valid UTF-8 character.
 341
 342
 343 <!-- ##### FUNCTION g_utf8_get_char ##### -->
 344 <para>
 345
 346 </para>
 347
 348 @p:
 349 @Returns:
 350
 351
 352 <!-- ##### FUNCTION g_utf8_get_char_validated ##### -->
 353 <para>
 354
 355 </para>
 356
 357 @p:
 358 @max_len:
 359 @Returns:
 360
 361
 362 <!-- ##### FUNCTION g_utf8_offset_to_pointer ##### -->
 363 <para>
 364
 365 </para>
 366
 367 @str:
 368 @offset:
 369 @Returns:
 370
 371
 372 <!-- ##### FUNCTION g_utf8_pointer_to_offset ##### -->
 373 <para>
 374
 375 </para>
 376
 377 @str:
 378 @pos:
 379 @Returns:
 380
 381
 382 <!-- ##### FUNCTION g_utf8_prev_char ##### -->
 383 <para>
 384
 385 </para>
 386
 387 @p:
 388 @Returns:
 389
 390
 391 <!-- ##### FUNCTION g_utf8_find_next_char ##### -->
 392 <para>
 393
 394 </para>
 395
 396 @p:
 397 @end:
 398 @Returns:
 399 <!-- # Unused Parameters # -->
 400 @bound:
 401
 402
 403 <!-- ##### FUNCTION g_utf8_find_prev_char ##### -->
 404 <para>
 405
 406 </para>
 407
 408 @str:
 409 @p:
 410 @Returns:
 411
 412
 413 <!-- ##### FUNCTION g_utf8_strlen ##### -->
 414 <para>
 415
 416 </para>
 417
 418 @p:
 419 @max:
 420 @Returns:
 421
 422
 423 <!-- ##### FUNCTION g_utf8_strncpy ##### -->
 424 <para>
 425
 426 </para>
 427
 428 @dest:
 429 @src:
 430 @n:
 431 @Returns:
 432
 433
 434 <!-- ##### FUNCTION g_utf8_strchr ##### -->
 435 <para>
 436
 437 </para>
 438
 439 @p:
 440 @len:
 441 @c:
 442 @Returns:
 443 <!-- # Unused Parameters # -->
 444 @ch:
 445
 446
 447 <!-- ##### FUNCTION g_utf8_strrchr ##### -->
 448 <para>
 449
 450 </para>
 451
 452 @p:
 453 @len:
 454 @c:
 455 @Returns:
 456 <!-- # Unused Parameters # -->
 457 @ch:
 458
 459
 460 <!-- ##### FUNCTION g_utf8_validate ##### -->
 461 <para>
 462
 463 </para>
 464
 465 @str:
 466 @max_len:
 467 @end:
 468 @Returns:
 469
 470
 471 <!-- ##### FUNCTION g_utf8_strup ##### -->
 472 <para>
 473
 474 </para>
 475
 476 @str:
 477 @len:
 478 @Returns:
 479
 480
 481 <!-- ##### FUNCTION g_utf8_strdown ##### -->
 482 <para>
 483
 484 </para>
 485
 486 @str:
 487 @len:
 488 @Returns:
 489
 490
 491 <!-- ##### FUNCTION g_utf8_casefold ##### -->
 492 <para>
 493
 494 </para>
 495
 496 @str:
 497 @len:
 498 @Returns:
 499
 500
 501 <!-- ##### FUNCTION g_utf8_normalize ##### -->
 502 <para>
 503
 504 </para>
 505
 506 @str:
 507 @len:
 508 @mode:
 509 @Returns:
 510
 511
 512 <!-- ##### ENUM GNormalizeMode ##### -->
 513 <para>
 514 A #GNormalizeMode defines how a Unicode string is transformed in a canonical
 515 form, standardizing such issues as whether a character with an accent is
 516 represented as a base character and combining accent or as a single precomposed
 517 character. Unicode strings should generally be normalized before comparing them.
 518 </para>
 519
 520 @G_NORMALIZE_DEFAULT: standardize differences that do not affect the
 521   text content, such as the above-mentioned accent representation.
 522 @G_NORMALIZE_NFD: another name for %G_NORMALIZE_DEFAULT.
 523 @G_NORMALIZE_DEFAULT_COMPOSE: like %G_NORMALIZE_DEFAULT, but with composed
 524   forms rather than a maximally decomposed form.
 525 @G_NORMALIZE_NFC: another name for %G_NORMALIZE_DEFAULT_COMPOSE.
 526 @G_NORMALIZE_ALL: beyond %G_NORMALIZE_DEFAULT also standardize the
 527   "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the
 528   standard forms (in this case DIGIT THREE). Formatting information may be
 529   lost but for most text operations such characters should be considered the
 530   same.
 531 @G_NORMALIZE_NFKD: another name for %G_NORMALIZE_ALL.
 532 @G_NORMALIZE_ALL_COMPOSE: like %G_NORMALIZE_ALL, but with composed
 533   forms rather than a maximally decomposed form.
 534 @G_NORMALIZE_NFKC: another name for %G_NORMALIZE_ALL_COMPOSE.
 535
 536 <!-- ##### FUNCTION g_utf8_collate ##### -->
 537 <para>
 538
 539 </para>
 540
 541 @str1:
 542 @str2:
 543 @Returns:
 544
 545
 546 <!-- ##### FUNCTION g_utf8_collate_key ##### -->
 547 <para>
 548
 549 </para>
 550
 551 @str:
 552 @len:
 553 @Returns:
 554
 555
 556 <!-- ##### FUNCTION g_utf8_to_utf16 ##### -->
 557 <para>
 558
 559 </para>
 560
 561 @str:
 562 @len:
 563 @items_read:
 564 @items_written:
 565 @error:
 566 @Returns:
 567
 568
 569 <!-- ##### FUNCTION g_utf8_to_ucs4 ##### -->
 570 <para>
 571
 572 </para>
 573
 574 @str:
 575 @len:
 576 @items_read:
 577 @items_written:
 578 @error:
 579 @Returns:
 580
 581
 582 <!-- ##### FUNCTION g_utf8_to_ucs4_fast ##### -->
 583 <para>
 584
 585 </para>
 586
 587 @str:
 588 @len:
 589 @items_written:
 590 @Returns:
 591
 592
 593 <!-- ##### FUNCTION g_utf16_to_ucs4 ##### -->
 594 <para>
 595
 596 </para>
 597
 598 @str:
 599 @len:
 600 @items_read:
 601 @items_written:
 602 @error:
 603 @Returns:
 604
 605
 606 <!-- ##### FUNCTION g_utf16_to_utf8 ##### -->
 607 <para>
 608
 609 </para>
 610
 611 @str:
 612 @len:
 613 @items_read:
 614 @items_written:
 615 @error:
 616 @Returns:
 617
 618
 619 <!-- ##### FUNCTION g_ucs4_to_utf16 ##### -->
 620 <para>
 621
 622 </para>
 623
 624 @str:
 625 @len:
 626 @items_read:
 627 @items_written:
 628 @error:
 629 @Returns:
 630
 631
 632 <!-- ##### FUNCTION g_ucs4_to_utf8 ##### -->
 633 <para>
 634
 635 </para>
 636
 637 @str:
 638 @len:
 639 @items_read:
 640 @items_written:
 641 @error:
 642 @Returns:
 643
 644
 645 <!-- ##### FUNCTION g_unichar_to_utf8 ##### -->
 646 <para>
 647
 648 </para>
 649
 650 @c:
 651 @outbuf:
 652 @Returns:
 653
 654