glib/gstrfuncs.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 /*
  19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
  20  * file for a list of people on the GLib Team.  See the ChangeLog
  21  * files for a list of changes.  These files are distributed with
  22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
  23  */
  24
  25 /*
  26  * MT safe
  27  */
  28
  29 #include "config.h"
  30
  31 #include <stdarg.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <locale.h>
  35 #include <string.h>
  36 #include <locale.h>
  37 #include <errno.h>
  38 #include <garray.h>
  39 #include <ctype.h>              /* For tolower() */
  40
  41 #ifdef HAVE_XLOCALE_H
  42 /* Needed on BSD/OS X for e.g. strtod_l */
  43 #include <xlocale.h>
  44 #endif
  45
  46 #ifdef G_OS_WIN32
  47 #include <windows.h>
  48 #endif
  49
  50 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
  51
  52 #include "gstrfuncs.h"
  53
  54 #include "gprintf.h"
  55 #include "gprintfint.h"
  56 #include "glibintl.h"
  57
  58
  59 /**
  60  * SECTION:string_utils
  61  * @title: String Utility Functions
  62  * @short_description: various string-related functions
  63  *
  64  * This section describes a number of utility functions for creating,
  65  * duplicating, and manipulating strings.
  66  *
  67  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
  68  * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
  69  * are declared in the header `gprintf.h` which is not included in `glib.h`
  70  * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
  71  * explicitly include `<glib/gprintf.h>` in order to use the GLib
  72  * printf() functions.
  73  *
  74  * ## String precision pitfalls # {#string-precision}
  75  *
  76  * While you may use the printf() functions to format UTF-8 strings,
  77  * notice that the precision of a \%Ns parameter is interpreted
  78  * as the number of bytes, not characters to print. On top of that,
  79  * the GNU libc implementation of the printf() functions has the
  80  * "feature" that it checks that the string given for the \%Ns
  81  * parameter consists of a whole number of characters in the current
  82  * encoding. So, unless you are sure you are always going to be in an
  83  * UTF-8 locale or your know your text is restricted to ASCII, avoid
  84  * using \%Ns. If your intention is to format strings for a
  85  * certain number of columns, then \%Ns is not a correct solution
  86  * anyway, since it fails to take wide characters (see g_unichar_iswide())
  87  * into account.
  88  *
  89  * Note also that there are various printf() parameters which are platform
  90  * dependent. GLib provides platform independent macros for these parameters
  91  * which should be used instead. A common example is %G_GUINT64_FORMAT, which
  92  * should be used instead of `%llu` or similar parameters for formatting
  93  * 64-bit integers. These macros are all named `G_*_FORMAT`; see
  94  * [Basic Types][glib-Basic-Types].
  95  */
  96
  97 /**
  98  * g_ascii_isalnum:
  99  * @c: any character
 100  *
 101  * Determines whether a character is alphanumeric.
 102  *
 103  * Unlike the standard C library isalnum() function, this only
 104  * recognizes standard ASCII letters and ignores the locale,
 105  * returning %FALSE for all non-ASCII characters. Also, unlike
 106  * the standard library function, this takes a char, not an int,
 107  * so don't call it on %EOF, but no need to cast to #guchar before
 108  * passing a possibly non-ASCII character in.
 109  *
 110  * Returns: %TRUE if @c is an ASCII alphanumeric character
 111  */
 112
 113 /**
 114  * g_ascii_isalpha:
 115  * @c: any character
 116  *
 117  * Determines whether a character is alphabetic (i.e. a letter).
 118  *
 119  * Unlike the standard C library isalpha() function, this only
 120  * recognizes standard ASCII letters and ignores the locale,
 121  * returning %FALSE for all non-ASCII characters. Also, unlike
 122  * the standard library function, this takes a char, not an int,
 123  * so don't call it on %EOF, but no need to cast to #guchar before
 124  * passing a possibly non-ASCII character in.
 125  *
 126  * Returns: %TRUE if @c is an ASCII alphabetic character
 127  */
 128
 129 /**
 130  * g_ascii_iscntrl:
 131  * @c: any character
 132  *
 133  * Determines whether a character is a control character.
 134  *
 135  * Unlike the standard C library iscntrl() function, this only
 136  * recognizes standard ASCII control characters and ignores the
 137  * locale, returning %FALSE for all non-ASCII characters. Also,
 138  * unlike the standard library function, this takes a char, not
 139  * an int, so don't call it on %EOF, but no need to cast to #guchar
 140  * before passing a possibly non-ASCII character in.
 141  *
 142  * Returns: %TRUE if @c is an ASCII control character.
 143  */
 144
 145 /**
 146  * g_ascii_isdigit:
 147  * @c: any character
 148  *
 149  * Determines whether a character is digit (0-9).
 150  *
 151  * Unlike the standard C library isdigit() function, this takes
 152  * a char, not an int, so don't call it  on %EOF, but no need to
 153  * cast to #guchar before passing a possibly non-ASCII character in.
 154  *
 155  * Returns: %TRUE if @c is an ASCII digit.
 156  */
 157
 158 /**
 159  * g_ascii_isgraph:
 160  * @c: any character
 161  *
 162  * Determines whether a character is a printing character and not a space.
 163  *
 164  * Unlike the standard C library isgraph() function, this only
 165  * recognizes standard ASCII characters and ignores the locale,
 166  * returning %FALSE for all non-ASCII characters. Also, unlike
 167  * the standard library function, this takes a char, not an int,
 168  * so don't call it on %EOF, but no need to cast to #guchar before
 169  * passing a possibly non-ASCII character in.
 170  *
 171  * Returns: %TRUE if @c is an ASCII printing character other than space.
 172  */
 173
 174 /**
 175  * g_ascii_islower:
 176  * @c: any character
 177  *
 178  * Determines whether a character is an ASCII lower case letter.
 179  *
 180  * Unlike the standard C library islower() function, this only
 181  * recognizes standard ASCII letters and ignores the locale,
 182  * returning %FALSE for all non-ASCII characters. Also, unlike
 183  * the standard library function, this takes a char, not an int,
 184  * so don't call it on %EOF, but no need to worry about casting
 185  * to #guchar before passing a possibly non-ASCII character in.
 186  *
 187  * Returns: %TRUE if @c is an ASCII lower case letter
 188  */
 189
 190 /**
 191  * g_ascii_isprint:
 192  * @c: any character
 193  *
 194  * Determines whether a character is a printing character.
 195  *
 196  * Unlike the standard C library isprint() function, this only
 197  * recognizes standard ASCII characters and ignores the locale,
 198  * returning %FALSE for all non-ASCII characters. Also, unlike
 199  * the standard library function, this takes a char, not an int,
 200  * so don't call it on %EOF, but no need to cast to #guchar before
 201  * passing a possibly non-ASCII character in.
 202  *
 203  * Returns: %TRUE if @c is an ASCII printing character.
 204  */
 205
 206 /**
 207  * g_ascii_ispunct:
 208  * @c: any character
 209  *
 210  * Determines whether a character is a punctuation character.
 211  *
 212  * Unlike the standard C library ispunct() function, this only
 213  * recognizes standard ASCII letters and ignores the locale,
 214  * returning %FALSE for all non-ASCII characters. Also, unlike
 215  * the standard library function, this takes a char, not an int,
 216  * so don't call it on %EOF, but no need to cast to #guchar before
 217  * passing a possibly non-ASCII character in.
 218  *
 219  * Returns: %TRUE if @c is an ASCII punctuation character.
 220  */
 221
 222 /**
 223  * g_ascii_isspace:
 224  * @c: any character
 225  *
 226  * Determines whether a character is a white-space character.
 227  *
 228  * Unlike the standard C library isspace() function, this only
 229  * recognizes standard ASCII white-space and ignores the locale,
 230  * returning %FALSE for all non-ASCII characters. Also, unlike
 231  * the standard library function, this takes a char, not an int,
 232  * so don't call it on %EOF, but no need to cast to #guchar before
 233  * passing a possibly non-ASCII character in.
 234  *
 235  * Returns: %TRUE if @c is an ASCII white-space character
 236  */
 237
 238 /**
 239  * g_ascii_isupper:
 240  * @c: any character
 241  *
 242  * Determines whether a character is an ASCII upper case letter.
 243  *
 244  * Unlike the standard C library isupper() function, this only
 245  * recognizes standard ASCII letters and ignores the locale,
 246  * returning %FALSE for all non-ASCII characters. Also, unlike
 247  * the standard library function, this takes a char, not an int,
 248  * so don't call it on %EOF, but no need to worry about casting
 249  * to #guchar before passing a possibly non-ASCII character in.
 250  *
 251  * Returns: %TRUE if @c is an ASCII upper case letter
 252  */
 253
 254 /**
 255  * g_ascii_isxdigit:
 256  * @c: any character
 257  *
 258  * Determines whether a character is a hexadecimal-digit character.
 259  *
 260  * Unlike the standard C library isxdigit() function, this takes
 261  * a char, not an int, so don't call it on %EOF, but no need to
 262  * cast to #guchar before passing a possibly non-ASCII character in.
 263  *
 264  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
 265  */
 266
 267 /**
 268  * G_ASCII_DTOSTR_BUF_SIZE:
 269  *
 270  * A good size for a buffer to be passed into g_ascii_dtostr().
 271  * It is guaranteed to be enough for all output of that function
 272  * on systems with 64bit IEEE-compatible doubles.
 273  *
 274  * The typical usage would be something like:
 275  * |[<!-- language="C" -->
 276  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
 277  *
 278  *   fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
 279  * ]|
 280  */
 281
 282 /**
 283  * g_strstrip:
 284  * @string: a string to remove the leading and trailing whitespace from
 285  *
 286  * Removes leading and trailing whitespace from a string.
 287  * See g_strchomp() and g_strchug().
 288  *
 289  * Returns: @string
 290  */
 291
 292 /**
 293  * G_STR_DELIMITERS:
 294  *
 295  * The standard delimiters, used in g_strdelimit().
 296  */
 297
 298 static const guint16 ascii_table_data[256] = {
 299   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 300   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
 301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 302   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 303   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 304   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 305   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
 306   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 307   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
 308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 309   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 310   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 311   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
 312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 313   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 314   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
 315   /* the upper 128 are all zeroes */
 316 };
 317
 318 const guint16 * const g_ascii_table = ascii_table_data;
 319
 320 #if defined (HAVE_NEWLOCALE) && \
 321     defined (HAVE_USELOCALE) && \
 322     defined (HAVE_STRTOD_L) && \
 323     defined (HAVE_STRTOULL_L) && \
 324     defined (HAVE_STRTOLL_L)
 325 #define USE_XLOCALE 1
 326 #endif
 327
 328 #ifdef USE_XLOCALE
 329 static locale_t
 330 get_C_locale (void)
 331 {
 332   static gsize initialized = FALSE;
 333   static locale_t C_locale = NULL;
 334
 335   if (g_once_init_enter (&initialized))
 336     {
 337       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
 338       g_once_init_leave (&initialized, TRUE);
 339     }
 340
 341   return C_locale;
 342 }
 343 #endif
 344
 345 /**
 346  * g_strdup:
 347  * @str: (nullable): the string to duplicate
 348  *
 349  * Duplicates a string. If @str is %NULL it returns %NULL.
 350  * The returned string should be freed with g_free()
 351  * when no longer needed.
 352  *
 353  * Returns: a newly-allocated copy of @str
 354  */
 355 gchar*
 356 g_strdup (const gchar *str)
 357 {
 358   gchar *new_str;
 359   gsize length;
 360
 361   if (str)
 362     {
 363       length = strlen (str) + 1;
 364       new_str = g_new (char, length);
 365       memcpy (new_str, str, length);
 366     }
 367   else
 368     new_str = NULL;
 369
 370   return new_str;
 371 }
 372
 373 /**
 374  * g_memdup:
 375  * @mem: the memory to copy.
 376  * @byte_size: the number of bytes to copy.
 377  *
 378  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
 379  * from @mem. If @mem is %NULL it returns %NULL.
 380  *
 381  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
 382  *  is %NULL.
 383  * Deprecated: 2.68: Use g_memdup2() instead, as it accepts a #gsize argument
 384  *     for @byte_size, avoiding the possibility of overflow in a #gsize → #guint
 385  *     conversion
 386  */
 387 gpointer
 388 g_memdup (gconstpointer mem,
 389           guint         byte_size)
 390 {
 391   gpointer new_mem;
 392
 393   if (mem && byte_size != 0)
 394     {
 395       new_mem = g_malloc (byte_size);
 396       memcpy (new_mem, mem, byte_size);
 397     }
 398   else
 399     new_mem = NULL;
 400
 401   return new_mem;
 402 }
 403
 404 /**
 405  * g_memdup2:
 406  * @mem: (nullable): the memory to copy.
 407  * @byte_size: the number of bytes to copy.
 408  *
 409  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
 410  * from @mem. If @mem is %NULL it returns %NULL.
 411  *
 412  * This replaces g_memdup(), which was prone to integer overflows when
 413  * converting the argument from a #gsize to a #guint.
 414  *
 415  * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
 416  *    or %NULL if @mem is %NULL.
 417  * Since: 2.68
 418  */
 419 gpointer
 420 g_memdup2 (gconstpointer mem,
 421            gsize         byte_size)
 422 {
 423   gpointer new_mem;
 424
 425   if (mem && byte_size != 0)
 426     {
 427       new_mem = g_malloc (byte_size);
 428       memcpy (new_mem, mem, byte_size);
 429     }
 430   else
 431     new_mem = NULL;
 432
 433   return new_mem;
 434 }
 435
 436 /**
 437  * g_strndup:
 438  * @str: the string to duplicate
 439  * @n: the maximum number of bytes to copy from @str
 440  *
 441  * Duplicates the first @n bytes of a string, returning a newly-allocated
 442  * buffer @n + 1 bytes long which will always be nul-terminated. If @str
 443  * is less than @n bytes long the buffer is padded with nuls. If @str is
 444  * %NULL it returns %NULL. The returned value should be freed when no longer
 445  * needed.
 446  *
 447  * To copy a number of characters from a UTF-8 encoded string,
 448  * use g_utf8_strncpy() instead.
 449  *
 450  * Returns: a newly-allocated buffer containing the first @n bytes
 451  *     of @str, nul-terminated
 452  */
 453 gchar*
 454 g_strndup (const gchar *str,
 455            gsize        n)
 456 {
 457   gchar *new_str;
 458
 459   if (str)
 460     {
 461       new_str = g_new (gchar, n + 1);
 462       strncpy (new_str, str, n);
 463       new_str[n] = '\0';
 464     }
 465   else
 466     new_str = NULL;
 467
 468   return new_str;
 469 }
 470
 471 /**
 472  * g_strnfill:
 473  * @length: the length of the new string
 474  * @fill_char: the byte to fill the string with
 475  *
 476  * Creates a new string @length bytes long filled with @fill_char.
 477  * The returned string should be freed when no longer needed.
 478  *
 479  * Returns: a newly-allocated string filled the @fill_char
 480  */
 481 gchar*
 482 g_strnfill (gsize length,
 483             gchar fill_char)
 484 {
 485   gchar *str;
 486
 487   str = g_new (gchar, length + 1);
 488   memset (str, (guchar)fill_char, length);
 489   str[length] = '\0';
 490
 491   return str;
 492 }
 493
 494 /**
 495  * g_stpcpy:
 496  * @dest: destination buffer.
 497  * @src: source string.
 498  *
 499  * Copies a nul-terminated string into the dest buffer, include the
 500  * trailing nul, and return a pointer to the trailing nul byte.
 501  * This is useful for concatenating multiple strings together
 502  * without having to repeatedly scan for the end.
 503  *
 504  * Returns: a pointer to trailing nul byte.
 505  **/
 506 gchar *
 507 g_stpcpy (gchar       *dest,
 508           const gchar *src)
 509 {
 510 #ifdef HAVE_STPCPY
 511   g_return_val_if_fail (dest != NULL, NULL);
 512   g_return_val_if_fail (src != NULL, NULL);
 513   return stpcpy (dest, src);
 514 #else
 515   gchar *d = dest;
 516   const gchar *s = src;
 517
 518   g_return_val_if_fail (dest != NULL, NULL);
 519   g_return_val_if_fail (src != NULL, NULL);
 520   do
 521     *d++ = *s;
 522   while (*s++ != '\0');
 523
 524   return d - 1;
 525 #endif
 526 }
 527
 528 /**
 529  * g_strdup_vprintf:
 530  * @format: (not nullable): a standard printf() format string, but notice
 531  *     [string precision pitfalls][string-precision]
 532  * @args: the list of parameters to insert into the format string
 533  *
 534  * Similar to the standard C vsprintf() function but safer, since it
 535  * calculates the maximum space required and allocates memory to hold
 536  * the result. The returned string should be freed with g_free() when
 537  * no longer needed.
 538  *
 539  * The returned string is guaranteed to be non-NULL, unless @format
 540  * contains `%lc` or `%ls` conversions, which can fail if no multibyte
 541  * representation is available for the given character.
 542  *
 543  * See also g_vasprintf(), which offers the same functionality, but
 544  * additionally returns the length of the allocated string.
 545  *
 546  * Returns: a newly-allocated string holding the result
 547  */
 548 gchar*
 549 g_strdup_vprintf (const gchar *format,
 550                   va_list      args)
 551 {
 552   gchar *string = NULL;
 553
 554   g_vasprintf (&string, format, args);
 555
 556   return string;
 557 }
 558
 559 /**
 560  * g_strdup_printf:
 561  * @format: (not nullable): a standard printf() format string, but notice
 562  *     [string precision pitfalls][string-precision]
 563  * @...: the parameters to insert into the format string
 564  *
 565  * Similar to the standard C sprintf() function but safer, since it
 566  * calculates the maximum space required and allocates memory to hold
 567  * the result. The returned string should be freed with g_free() when no
 568  * longer needed.
 569  *
 570  * The returned string is guaranteed to be non-NULL, unless @format
 571  * contains `%lc` or `%ls` conversions, which can fail if no multibyte
 572  * representation is available for the given character.
 573  *
 574  * Returns: a newly-allocated string holding the result
 575  */
 576 gchar*
 577 g_strdup_printf (const gchar *format,
 578                  ...)
 579 {
 580   gchar *buffer;
 581   va_list args;
 582
 583   va_start (args, format);
 584   buffer = g_strdup_vprintf (format, args);
 585   va_end (args);
 586
 587   return buffer;
 588 }
 589
 590 /**
 591  * g_strconcat:
 592  * @string1: the first string to add, which must not be %NULL
 593  * @...: a %NULL-terminated list of strings to append to the string
 594  *
 595  * Concatenates all of the given strings into one long string. The
 596  * returned string should be freed with g_free() when no longer needed.
 597  *
 598  * The variable argument list must end with %NULL. If you forget the %NULL,
 599  * g_strconcat() will start appending random memory junk to your string.
 600  *
 601  * Note that this function is usually not the right function to use to
 602  * assemble a translated message from pieces, since proper translation
 603  * often requires the pieces to be reordered.
 604  *
 605  * Returns: a newly-allocated string containing all the string arguments
 606  */
 607 gchar*
 608 g_strconcat (const gchar *string1, ...)
 609 {
 610   gsize   l;
 611   va_list args;
 612   gchar   *s;
 613   gchar   *concat;
 614   gchar   *ptr;
 615
 616   if (!string1)
 617     return NULL;
 618
 619   l = 1 + strlen (string1);
 620   va_start (args, string1);
 621   s = va_arg (args, gchar*);
 622   while (s)
 623     {
 624       l += strlen (s);
 625       s = va_arg (args, gchar*);
 626     }
 627   va_end (args);
 628
 629   concat = g_new (gchar, l);
 630   ptr = concat;
 631
 632   ptr = g_stpcpy (ptr, string1);
 633   va_start (args, string1);
 634   s = va_arg (args, gchar*);
 635   while (s)
 636     {
 637       ptr = g_stpcpy (ptr, s);
 638       s = va_arg (args, gchar*);
 639     }
 640   va_end (args);
 641
 642   return concat;
 643 }
 644
 645 /**
 646  * g_strtod:
 647  * @nptr:    the string to convert to a numeric value.
 648  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 649  *           character after the last character used in the conversion.
 650  *
 651  * Converts a string to a #gdouble value.
 652  * It calls the standard strtod() function to handle the conversion, but
 653  * if the string is not completely converted it attempts the conversion
 654  * again with g_ascii_strtod(), and returns the best match.
 655  *
 656  * This function should seldom be used. The normal situation when reading
 657  * numbers not for human consumption is to use g_ascii_strtod(). Only when
 658  * you know that you must expect both locale formatted and C formatted numbers
 659  * should you use this. Make sure that you don't pass strings such as comma
 660  * separated lists of values, since the commas may be interpreted as a decimal
 661  * point in some locales, causing unexpected results.
 662  *
 663  * Returns: the #gdouble value.
 664  **/
 665 gdouble
 666 g_strtod (const gchar *nptr,
 667           gchar      **endptr)
 668 {
 669   gchar *fail_pos_1;
 670   gchar *fail_pos_2;
 671   gdouble val_1;
 672   gdouble val_2 = 0;
 673
 674   g_return_val_if_fail (nptr != NULL, 0);
 675
 676   fail_pos_1 = NULL;
 677   fail_pos_2 = NULL;
 678
 679   val_1 = strtod (nptr, &fail_pos_1);
 680
 681   if (fail_pos_1 && fail_pos_1[0] != 0)
 682     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
 683
 684   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
 685     {
 686       if (endptr)
 687         *endptr = fail_pos_1;
 688       return val_1;
 689     }
 690   else
 691     {
 692       if (endptr)
 693         *endptr = fail_pos_2;
 694       return val_2;
 695     }
 696 }
 697
 698 /**
 699  * g_ascii_strtod:
 700  * @nptr:    the string to convert to a numeric value.
 701  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 702  *           character after the last character used in the conversion.
 703  *
 704  * Converts a string to a #gdouble value.
 705  *
 706  * This function behaves like the standard strtod() function
 707  * does in the C locale. It does this without actually changing
 708  * the current locale, since that would not be thread-safe.
 709  * A limitation of the implementation is that this function
 710  * will still accept localized versions of infinities and NANs.
 711  *
 712  * This function is typically used when reading configuration
 713  * files or other non-user input that should be locale independent.
 714  * To handle input from the user you should normally use the
 715  * locale-sensitive system strtod() function.
 716  *
 717  * To convert from a #gdouble to a string in a locale-insensitive
 718  * way, use g_ascii_dtostr().
 719  *
 720  * If the correct value would cause overflow, plus or minus %HUGE_VAL
 721  * is returned (according to the sign of the value), and %ERANGE is
 722  * stored in %errno. If the correct value would cause underflow,
 723  * zero is returned and %ERANGE is stored in %errno.
 724  *
 725  * This function resets %errno before calling strtod() so that
 726  * you can reliably detect overflow and underflow.
 727  *
 728  * Returns: the #gdouble value.
 729  */
 730 gdouble
 731 g_ascii_strtod (const gchar *nptr,
 732                 gchar      **endptr)
 733 {
 734 #ifdef USE_XLOCALE
 735
 736   g_return_val_if_fail (nptr != NULL, 0);
 737
 738   errno = 0;
 739
 740   return strtod_l (nptr, endptr, get_C_locale ());
 741
 742 #else
 743
 744   gchar *fail_pos;
 745   gdouble val;
 746 #ifndef __BIONIC__
 747   struct lconv *locale_data;
 748 #endif
 749   const char *decimal_point;
 750   gsize decimal_point_len;
 751   const char *p, *decimal_point_pos;
 752   const char *end = NULL; /* Silence gcc */
 753   int strtod_errno;
 754
 755   g_return_val_if_fail (nptr != NULL, 0);
 756
 757   fail_pos = NULL;
 758
 759 #ifndef __BIONIC__
 760   locale_data = localeconv ();
 761   decimal_point = locale_data->decimal_point;
 762   decimal_point_len = strlen (decimal_point);
 763 #else
 764   decimal_point = ".";
 765   decimal_point_len = 1;
 766 #endif
 767
 768   g_assert (decimal_point_len != 0);
 769
 770   decimal_point_pos = NULL;
 771   end = NULL;
 772
 773   if (decimal_point[0] != '.' ||
 774       decimal_point[1] != 0)
 775     {
 776       p = nptr;
 777       /* Skip leading space */
 778       while (g_ascii_isspace (*p))
 779         p++;
 780
 781       /* Skip leading optional sign */
 782       if (*p == '+' || *p == '-')
 783         p++;
 784
 785       if (p[0] == '0' &&
 786           (p[1] == 'x' || p[1] == 'X'))
 787         {
 788           p += 2;
 789           /* HEX - find the (optional) decimal point */
 790
 791           while (g_ascii_isxdigit (*p))
 792             p++;
 793
 794           if (*p == '.')
 795             decimal_point_pos = p++;
 796
 797           while (g_ascii_isxdigit (*p))
 798             p++;
 799
 800           if (*p == 'p' || *p == 'P')
 801             p++;
 802           if (*p == '+' || *p == '-')
 803             p++;
 804           while (g_ascii_isdigit (*p))
 805             p++;
 806
 807           end = p;
 808         }
 809       else if (g_ascii_isdigit (*p) || *p == '.')
 810         {
 811           while (g_ascii_isdigit (*p))
 812             p++;
 813
 814           if (*p == '.')
 815             decimal_point_pos = p++;
 816
 817           while (g_ascii_isdigit (*p))
 818             p++;
 819
 820           if (*p == 'e' || *p == 'E')
 821             p++;
 822           if (*p == '+' || *p == '-')
 823             p++;
 824           while (g_ascii_isdigit (*p))
 825             p++;
 826
 827           end = p;
 828         }
 829       /* For the other cases, we need not convert the decimal point */
 830     }
 831
 832   if (decimal_point_pos)
 833     {
 834       char *copy, *c;
 835
 836       /* We need to convert the '.' to the locale specific decimal point */
 837       copy = g_malloc (end - nptr + 1 + decimal_point_len);
 838
 839       c = copy;
 840       memcpy (c, nptr, decimal_point_pos - nptr);
 841       c += decimal_point_pos - nptr;
 842       memcpy (c, decimal_point, decimal_point_len);
 843       c += decimal_point_len;
 844       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
 845       c += end - (decimal_point_pos + 1);
 846       *c = 0;
 847
 848       errno = 0;
 849       val = strtod (copy, &fail_pos);
 850       strtod_errno = errno;
 851
 852       if (fail_pos)
 853         {
 854           if (fail_pos - copy > decimal_point_pos - nptr)
 855             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
 856           else
 857             fail_pos = (char *)nptr + (fail_pos - copy);
 858         }
 859
 860       g_free (copy);
 861
 862     }
 863   else if (end)
 864     {
 865       char *copy;
 866
 867       copy = g_malloc (end - (char *)nptr + 1);
 868       memcpy (copy, nptr, end - nptr);
 869       *(copy + (end - (char *)nptr)) = 0;
 870
 871       errno = 0;
 872       val = strtod (copy, &fail_pos);
 873       strtod_errno = errno;
 874
 875       if (fail_pos)
 876         {
 877           fail_pos = (char *)nptr + (fail_pos - copy);
 878         }
 879
 880       g_free (copy);
 881     }
 882   else
 883     {
 884       errno = 0;
 885       val = strtod (nptr, &fail_pos);
 886       strtod_errno = errno;
 887     }
 888
 889   if (endptr)
 890     *endptr = fail_pos;
 891
 892   errno = strtod_errno;
 893
 894   return val;
 895 #endif
 896 }
 897
 898
 899 /**
 900  * g_ascii_dtostr:
 901  * @buffer: A buffer to place the resulting string in
 902  * @buf_len: The length of the buffer.
 903  * @d: The #gdouble to convert
 904  *
 905  * Converts a #gdouble to a string, using the '.' as
 906  * decimal point.
 907  *
 908  * This function generates enough precision that converting
 909  * the string back using g_ascii_strtod() gives the same machine-number
 910  * (on machines with IEEE compatible 64bit doubles). It is
 911  * guaranteed that the size of the resulting string will never
 912  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
 913  * nul character, which is always added.
 914  *
 915  * Returns: The pointer to the buffer with the converted string.
 916  **/
 917 gchar *
 918 g_ascii_dtostr (gchar       *buffer,
 919                 gint         buf_len,
 920                 gdouble      d)
 921 {
 922   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
 923 }
 924
 925 #pragma GCC diagnostic push
 926 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 927
 928 /**
 929  * g_ascii_formatd:
 930  * @buffer: A buffer to place the resulting string in
 931  * @buf_len: The length of the buffer.
 932  * @format: The printf()-style format to use for the
 933  *          code to use for converting.
 934  * @d: The #gdouble to convert
 935  *
 936  * Converts a #gdouble to a string, using the '.' as
 937  * decimal point. To format the number you pass in
 938  * a printf()-style format string. Allowed conversion
 939  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
 940  *
 941  * The returned buffer is guaranteed to be nul-terminated.
 942  *
 943  * If you just want to want to serialize the value into a
 944  * string, use g_ascii_dtostr().
 945  *
 946  * Returns: The pointer to the buffer with the converted string.
 947  */
 948 gchar *
 949 g_ascii_formatd (gchar       *buffer,
 950                  gint         buf_len,
 951                  const gchar *format,
 952                  gdouble      d)
 953 {
 954 #ifdef USE_XLOCALE
 955   locale_t old_locale;
 956
 957   old_locale = uselocale (get_C_locale ());
 958    _g_snprintf (buffer, buf_len, format, d);
 959   uselocale (old_locale);
 960
 961   return buffer;
 962 #else
 963 #ifndef __BIONIC__
 964   struct lconv *locale_data;
 965 #endif
 966   const char *decimal_point;
 967   gsize decimal_point_len;
 968   gchar *p;
 969   int rest_len;
 970   gchar format_char;
 971
 972   g_return_val_if_fail (buffer != NULL, NULL);
 973   g_return_val_if_fail (format[0] == '%', NULL);
 974   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
 975
 976   format_char = format[strlen (format) - 1];
 977
 978   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
 979                         format_char == 'f' || format_char == 'F' ||
 980                         format_char == 'g' || format_char == 'G',
 981                         NULL);
 982
 983   if (format[0] != '%')
 984     return NULL;
 985
 986   if (strpbrk (format + 1, "'l%"))
 987     return NULL;
 988
 989   if (!(format_char == 'e' || format_char == 'E' ||
 990         format_char == 'f' || format_char == 'F' ||
 991         format_char == 'g' || format_char == 'G'))
 992     return NULL;
 993
 994   _g_snprintf (buffer, buf_len, format, d);
 995
 996 #ifndef __BIONIC__
 997   locale_data = localeconv ();
 998   decimal_point = locale_data->decimal_point;
 999   decimal_point_len = strlen (decimal_point);
1000 #else
1001   decimal_point = ".";
1002   decimal_point_len = 1;
1003 #endif
1004
1005   g_assert (decimal_point_len != 0);
1006
1007   if (decimal_point[0] != '.' ||
1008       decimal_point[1] != 0)
1009     {
1010       p = buffer;
1011
1012       while (g_ascii_isspace (*p))
1013         p++;
1014
1015       if (*p == '+' || *p == '-')
1016         p++;
1017
1018       while (isdigit ((guchar)*p))
1019         p++;
1020
1021       if (strncmp (p, decimal_point, decimal_point_len) == 0)
1022         {
1023           *p = '.';
1024           p++;
1025           if (decimal_point_len > 1)
1026             {
1027               rest_len = strlen (p + (decimal_point_len - 1));
1028               memmove (p, p + (decimal_point_len - 1), rest_len);
1029               p[rest_len] = 0;
1030             }
1031         }
1032     }
1033
1034   return buffer;
1035 #endif
1036 }
1037 #pragma GCC diagnostic pop
1038
1039 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1040                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
1041 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
1042 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
1043 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
1044 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1045 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1046
1047 #ifndef USE_XLOCALE
1048
1049 static guint64
1050 g_parse_long_long (const gchar  *nptr,
1051                    const gchar **endptr,
1052                    guint         base,
1053                    gboolean     *negative)
1054 {
1055   /* this code is based on on the strtol(3) code from GNU libc released under
1056    * the GNU Lesser General Public License.
1057    *
1058    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1059    *        Free Software Foundation, Inc.
1060    */
1061   gboolean overflow;
1062   guint64 cutoff;
1063   guint64 cutlim;
1064   guint64 ui64;
1065   const gchar *s, *save;
1066   guchar c;
1067
1068   g_return_val_if_fail (nptr != NULL, 0);
1069
1070   *negative = FALSE;
1071   if (base == 1 || base > 36)
1072     {
1073       errno = EINVAL;
1074       if (endptr)
1075         *endptr = nptr;
1076       return 0;
1077     }
1078
1079   save = s = nptr;
1080
1081   /* Skip white space.  */
1082   while (ISSPACE (*s))
1083     ++s;
1084
1085   if (G_UNLIKELY (!*s))
1086     goto noconv;
1087
1088   /* Check for a sign.  */
1089   if (*s == '-')
1090     {
1091       *negative = TRUE;
1092       ++s;
1093     }
1094   else if (*s == '+')
1095     ++s;
1096
1097   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1098   if (*s == '0')
1099     {
1100       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1101         {
1102           s += 2;
1103           base = 16;
1104         }
1105       else if (base == 0)
1106         base = 8;
1107     }
1108   else if (base == 0)
1109     base = 10;
1110
1111   /* Save the pointer so we can check later if anything happened.  */
1112   save = s;
1113   cutoff = G_MAXUINT64 / base;
1114   cutlim = G_MAXUINT64 % base;
1115
1116   overflow = FALSE;
1117   ui64 = 0;
1118   c = *s;
1119   for (; c; c = *++s)
1120     {
1121       if (c >= '0' && c <= '9')
1122         c -= '0';
1123       else if (ISALPHA (c))
1124         c = TOUPPER (c) - 'A' + 10;
1125       else
1126         break;
1127       if (c >= base)
1128         break;
1129       /* Check for overflow.  */
1130       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1131         overflow = TRUE;
1132       else
1133         {
1134           ui64 *= base;
1135           ui64 += c;
1136         }
1137     }
1138
1139   /* Check if anything actually happened.  */
1140   if (s == save)
1141     goto noconv;
1142
1143   /* Store in ENDPTR the address of one character
1144      past the last character we converted.  */
1145   if (endptr)
1146     *endptr = s;
1147
1148   if (G_UNLIKELY (overflow))
1149     {
1150       errno = ERANGE;
1151       return G_MAXUINT64;
1152     }
1153
1154   return ui64;
1155
1156  noconv:
1157   /* We must handle a special case here: the base is 0 or 16 and the
1158      first two characters are '0' and 'x', but the rest are no
1159      hexadecimal digits.  This is no error case.  We return 0 and
1160      ENDPTR points to the `x`.  */
1161   if (endptr)
1162     {
1163       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1164           && save[-2] == '0')
1165         *endptr = &save[-1];
1166       else
1167         /*  There was no number to convert.  */
1168         *endptr = nptr;
1169     }
1170   return 0;
1171 }
1172 #endif /* !USE_XLOCALE */
1173
1174 /**
1175  * g_ascii_strtoull:
1176  * @nptr:    the string to convert to a numeric value.
1177  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1178  *           character after the last character used in the conversion.
1179  * @base:    to be used for the conversion, 2..36 or 0
1180  *
1181  * Converts a string to a #guint64 value.
1182  * This function behaves like the standard strtoull() function
1183  * does in the C locale. It does this without actually
1184  * changing the current locale, since that would not be
1185  * thread-safe.
1186  *
1187  * Note that input with a leading minus sign (`-`) is accepted, and will return
1188  * the negation of the parsed number, unless that would overflow a #guint64.
1189  * Critically, this means you cannot assume that a short fixed length input will
1190  * never result in a low return value, as the input could have a leading `-`.
1191  *
1192  * This function is typically used when reading configuration
1193  * files or other non-user input that should be locale independent.
1194  * To handle input from the user you should normally use the
1195  * locale-sensitive system strtoull() function.
1196  *
1197  * If the correct value would cause overflow, %G_MAXUINT64
1198  * is returned, and `ERANGE` is stored in `errno`.
1199  * If the base is outside the valid range, zero is returned, and
1200  * `EINVAL` is stored in `errno`.
1201  * If the string conversion fails, zero is returned, and @endptr returns
1202  * @nptr (if @endptr is non-%NULL).
1203  *
1204  * Returns: the #guint64 value or zero on error.
1205  *
1206  * Since: 2.2
1207  */
1208 guint64
1209 g_ascii_strtoull (const gchar *nptr,
1210                   gchar      **endptr,
1211                   guint        base)
1212 {
1213 #ifdef USE_XLOCALE
1214   return strtoull_l (nptr, endptr, base, get_C_locale ());
1215 #else
1216   gboolean negative;
1217   guint64 result;
1218
1219   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1220
1221   /* Return the result of the appropriate sign.  */
1222   return negative ? -result : result;
1223 #endif
1224 }
1225
1226 /**
1227  * g_ascii_strtoll:
1228  * @nptr:    the string to convert to a numeric value.
1229  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1230  *           character after the last character used in the conversion.
1231  * @base:    to be used for the conversion, 2..36 or 0
1232  *
1233  * Converts a string to a #gint64 value.
1234  * This function behaves like the standard strtoll() function
1235  * does in the C locale. It does this without actually
1236  * changing the current locale, since that would not be
1237  * thread-safe.
1238  *
1239  * This function is typically used when reading configuration
1240  * files or other non-user input that should be locale independent.
1241  * To handle input from the user you should normally use the
1242  * locale-sensitive system strtoll() function.
1243  *
1244  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1245  * is returned, and `ERANGE` is stored in `errno`.
1246  * If the base is outside the valid range, zero is returned, and
1247  * `EINVAL` is stored in `errno`. If the
1248  * string conversion fails, zero is returned, and @endptr returns @nptr
1249  * (if @endptr is non-%NULL).
1250  *
1251  * Returns: the #gint64 value or zero on error.
1252  *
1253  * Since: 2.12
1254  */
1255 gint64
1256 g_ascii_strtoll (const gchar *nptr,
1257                  gchar      **endptr,
1258                  guint        base)
1259 {
1260 #ifdef USE_XLOCALE
1261   return strtoll_l (nptr, endptr, base, get_C_locale ());
1262 #else
1263   gboolean negative;
1264   guint64 result;
1265
1266   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1267
1268   if (negative && result > (guint64) G_MININT64)
1269     {
1270       errno = ERANGE;
1271       return G_MININT64;
1272     }
1273   else if (!negative && result > (guint64) G_MAXINT64)
1274     {
1275       errno = ERANGE;
1276       return G_MAXINT64;
1277     }
1278   else if (negative)
1279     return - (gint64) result;
1280   else
1281     return (gint64) result;
1282 #endif
1283 }
1284
1285 /**
1286  * g_strerror:
1287  * @errnum: the system error number. See the standard C %errno
1288  *     documentation
1289  *
1290  * Returns a string corresponding to the given error code, e.g. "no
1291  * such process". Unlike strerror(), this always returns a string in
1292  * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1293  * the lifetime of the process.
1294  *
1295  * Note that the string may be translated according to the current locale.
1296  *
1297  * The value of %errno will not be changed by this function. However, it may
1298  * be changed by intermediate function calls, so you should save its value
1299  * as soon as the call returns:
1300  * |[
1301  *   int saved_errno;
1302  *
1303  *   ret = read (blah);
1304  *   saved_errno = errno;
1305  *
1306  *   g_strerror (saved_errno);
1307  * ]|
1308  *
1309  * Returns: a UTF-8 string describing the error code. If the error code
1310  *     is unknown, it returns a string like "unknown error (<code>)".
1311  */
1312 const gchar *
1313 g_strerror (gint errnum)
1314 {
1315   static GHashTable *errors;
1316   G_LOCK_DEFINE_STATIC (errors);
1317   const gchar *msg;
1318   gint saved_errno = errno;
1319
1320   G_LOCK (errors);
1321   if (errors)
1322     msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1323   else
1324     {
1325       errors = g_hash_table_new (NULL, NULL);
1326       msg = NULL;
1327     }
1328
1329   if (!msg)
1330     {
1331       gchar buf[1024];
1332       GError *error = NULL;
1333
1334 #if defined(G_OS_WIN32)
1335       strerror_s (buf, sizeof (buf), errnum);
1336       msg = buf;
1337 #elif defined(HAVE_STRERROR_R)
1338       /* Match the condition in strerror_r(3) for glibc */
1339 #  if defined(STRERROR_R_CHAR_P)
1340       msg = strerror_r (errnum, buf, sizeof (buf));
1341 #  else
1342       (void) strerror_r (errnum, buf, sizeof (buf));
1343       msg = buf;
1344 #  endif /* HAVE_STRERROR_R */
1345 #else
1346       g_strlcpy (buf, strerror (errnum), sizeof (buf));
1347       msg = buf;
1348 #endif
1349       if (!g_get_console_charset (NULL))
1350         {
1351           msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1352           if (error)
1353             g_print ("%s\n", error->message);
1354         }
1355       else if (msg == (const gchar *)buf)
1356         msg = g_strdup (buf);
1357
1358       g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1359     }
1360   G_UNLOCK (errors);
1361
1362   errno = saved_errno;
1363   return msg;
1364 }
1365
1366 /**
1367  * g_strsignal:
1368  * @signum: the signal number. See the `signal` documentation
1369  *
1370  * Returns a string describing the given signal, e.g. "Segmentation fault".
1371  * You should use this function in preference to strsignal(), because it
1372  * returns a string in UTF-8 encoding, and since not all platforms support
1373  * the strsignal() function.
1374  *
1375  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1376  *     it returns "unknown signal (<signum>)".
1377  */
1378 const gchar *
1379 g_strsignal (gint signum)
1380 {
1381   gchar *msg;
1382   gchar *tofree;
1383   const gchar *ret;
1384
1385   msg = tofree = NULL;
1386
1387 #ifdef HAVE_STRSIGNAL
1388   msg = strsignal (signum);
1389   if (!g_get_console_charset (NULL))
1390     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1391 #endif
1392
1393   if (!msg)
1394     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1395   ret = g_intern_string (msg);
1396   g_free (tofree);
1397
1398   return ret;
1399 }
1400
1401 /* Functions g_strlcpy and g_strlcat were originally developed by
1402  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1403  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1404  * for more information.
1405  */
1406
1407 #ifdef HAVE_STRLCPY
1408 /* Use the native ones, if available; they might be implemented in assembly */
1409 gsize
1410 g_strlcpy (gchar       *dest,
1411            const gchar *src,
1412            gsize        dest_size)
1413 {
1414   g_return_val_if_fail (dest != NULL, 0);
1415   g_return_val_if_fail (src  != NULL, 0);
1416
1417   return strlcpy (dest, src, dest_size);
1418 }
1419
1420 gsize
1421 g_strlcat (gchar       *dest,
1422            const gchar *src,
1423            gsize        dest_size)
1424 {
1425   g_return_val_if_fail (dest != NULL, 0);
1426   g_return_val_if_fail (src  != NULL, 0);
1427
1428   return strlcat (dest, src, dest_size);
1429 }
1430
1431 #else /* ! HAVE_STRLCPY */
1432 /**
1433  * g_strlcpy:
1434  * @dest: destination buffer
1435  * @src: source buffer
1436  * @dest_size: length of @dest in bytes
1437  *
1438  * Portability wrapper that calls strlcpy() on systems which have it,
1439  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1440  * guaranteed to be nul-terminated; @src must be nul-terminated;
1441  * @dest_size is the buffer size, not the number of bytes to copy.
1442  *
1443  * At most @dest_size - 1 characters will be copied. Always nul-terminates
1444  * (unless @dest_size is 0). This function does not allocate memory. Unlike
1445  * strncpy(), this function doesn't pad @dest (so it's often faster). It
1446  * returns the size of the attempted result, strlen (src), so if
1447  * @retval >= @dest_size, truncation occurred.
1448  *
1449  * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1450  * but if you really want to avoid screwups, g_strdup() is an even better
1451  * idea.
1452  *
1453  * Returns: length of @src
1454  */
1455 gsize
1456 g_strlcpy (gchar       *dest,
1457            const gchar *src,
1458            gsize        dest_size)
1459 {
1460   gchar *d = dest;
1461   const gchar *s = src;
1462   gsize n = dest_size;
1463
1464   g_return_val_if_fail (dest != NULL, 0);
1465   g_return_val_if_fail (src  != NULL, 0);
1466
1467   /* Copy as many bytes as will fit */
1468   if (n != 0 && --n != 0)
1469     do
1470       {
1471         gchar c = *s++;
1472
1473         *d++ = c;
1474         if (c == 0)
1475           break;
1476       }
1477     while (--n != 0);
1478
1479   /* If not enough room in dest, add NUL and traverse rest of src */
1480   if (n == 0)
1481     {
1482       if (dest_size != 0)
1483         *d = 0;
1484       while (*s++)
1485         ;
1486     }
1487
1488   return s - src - 1;  /* count does not include NUL */
1489 }
1490
1491 /**
1492  * g_strlcat:
1493  * @dest: destination buffer, already containing one nul-terminated string
1494  * @src: source buffer
1495  * @dest_size: length of @dest buffer in bytes (not length of existing string
1496  *     inside @dest)
1497  *
1498  * Portability wrapper that calls strlcat() on systems which have it,
1499  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1500  * guaranteeing nul-termination for @dest. The total size of @dest won't
1501  * exceed @dest_size.
1502  *
1503  * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1504  * @dest_size is the full size of dest, not the space left over. This
1505  * function does not allocate memory. It always nul-terminates (unless
1506  * @dest_size == 0 or there were no nul characters in the @dest_size
1507  * characters of dest to start with).
1508  *
1509  * Caveat: this is supposedly a more secure alternative to strcat() or
1510  * strncat(), but for real security g_strconcat() is harder to mess up.
1511  *
1512  * Returns: size of attempted result, which is MIN (dest_size, strlen
1513  *     (original dest)) + strlen (src), so if retval >= dest_size,
1514  *     truncation occurred.
1515  */
1516 gsize
1517 g_strlcat (gchar       *dest,
1518            const gchar *src,
1519            gsize        dest_size)
1520 {
1521   gchar *d = dest;
1522   const gchar *s = src;
1523   gsize bytes_left = dest_size;
1524   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1525
1526   g_return_val_if_fail (dest != NULL, 0);
1527   g_return_val_if_fail (src  != NULL, 0);
1528
1529   /* Find the end of dst and adjust bytes left but don't go past end */
1530   while (*d != 0 && bytes_left-- != 0)
1531     d++;
1532   dlength = d - dest;
1533   bytes_left = dest_size - dlength;
1534
1535   if (bytes_left == 0)
1536     return dlength + strlen (s);
1537
1538   while (*s != 0)
1539     {
1540       if (bytes_left != 1)
1541         {
1542           *d++ = *s;
1543           bytes_left--;
1544         }
1545       s++;
1546     }
1547   *d = 0;
1548
1549   return dlength + (s - src);  /* count does not include NUL */
1550 }
1551 #endif /* ! HAVE_STRLCPY */
1552
1553 /**
1554  * g_ascii_strdown:
1555  * @str: a string
1556  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1557  *
1558  * Converts all upper case ASCII letters to lower case ASCII letters.
1559  *
1560  * Returns: a newly-allocated string, with all the upper case
1561  *     characters in @str converted to lower case, with semantics that
1562  *     exactly match g_ascii_tolower(). (Note that this is unlike the
1563  *     old g_strdown(), which modified the string in place.)
1564  */
1565 gchar*
1566 g_ascii_strdown (const gchar *str,
1567                  gssize       len)
1568 {
1569   gchar *result, *s;
1570
1571   g_return_val_if_fail (str != NULL, NULL);
1572
1573   if (len < 0)
1574     len = (gssize) strlen (str);
1575
1576   result = g_strndup (str, (gsize) len);
1577   for (s = result; *s; s++)
1578     *s = g_ascii_tolower (*s);
1579
1580   return result;
1581 }
1582
1583 /**
1584  * g_ascii_strup:
1585  * @str: a string
1586  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1587  *
1588  * Converts all lower case ASCII letters to upper case ASCII letters.
1589  *
1590  * Returns: a newly allocated string, with all the lower case
1591  *     characters in @str converted to upper case, with semantics that
1592  *     exactly match g_ascii_toupper(). (Note that this is unlike the
1593  *     old g_strup(), which modified the string in place.)
1594  */
1595 gchar*
1596 g_ascii_strup (const gchar *str,
1597                gssize       len)
1598 {
1599   gchar *result, *s;
1600
1601   g_return_val_if_fail (str != NULL, NULL);
1602
1603   if (len < 0)
1604     len = (gssize) strlen (str);
1605
1606   result = g_strndup (str, (gsize) len);
1607   for (s = result; *s; s++)
1608     *s = g_ascii_toupper (*s);
1609
1610   return result;
1611 }
1612
1613 /**
1614  * g_str_is_ascii:
1615  * @str: a string
1616  *
1617  * Determines if a string is pure ASCII. A string is pure ASCII if it
1618  * contains no bytes with the high bit set.
1619  *
1620  * Returns: %TRUE if @str is ASCII
1621  *
1622  * Since: 2.40
1623  */
1624 gboolean
1625 g_str_is_ascii (const gchar *str)
1626 {
1627   gsize i;
1628
1629   for (i = 0; str[i]; i++)
1630     if (str[i] & 0x80)
1631       return FALSE;
1632
1633   return TRUE;
1634 }
1635
1636 /**
1637  * g_strdown:
1638  * @string: the string to convert.
1639  *
1640  * Converts a string to lower case.
1641  *
1642  * Returns: the string
1643  *
1644  * Deprecated:2.2: This function is totally broken for the reasons discussed
1645  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1646  * instead.
1647  **/
1648 gchar*
1649 g_strdown (gchar *string)
1650 {
1651   guchar *s;
1652
1653   g_return_val_if_fail (string != NULL, NULL);
1654
1655   s = (guchar *) string;
1656
1657   while (*s)
1658     {
1659       if (isupper (*s))
1660         *s = tolower (*s);
1661       s++;
1662     }
1663
1664   return (gchar *) string;
1665 }
1666
1667 /**
1668  * g_strup:
1669  * @string: the string to convert
1670  *
1671  * Converts a string to upper case.
1672  *
1673  * Returns: the string
1674  *
1675  * Deprecated:2.2: This function is totally broken for the reasons
1676  *     discussed in the g_strncasecmp() docs - use g_ascii_strup()
1677  *     or g_utf8_strup() instead.
1678  */
1679 gchar*
1680 g_strup (gchar *string)
1681 {
1682   guchar *s;
1683
1684   g_return_val_if_fail (string != NULL, NULL);
1685
1686   s = (guchar *) string;
1687
1688   while (*s)
1689     {
1690       if (islower (*s))
1691         *s = toupper (*s);
1692       s++;
1693     }
1694
1695   return (gchar *) string;
1696 }
1697
1698 /**
1699  * g_strreverse:
1700  * @string: the string to reverse
1701  *
1702  * Reverses all of the bytes in a string. For example,
1703  * `g_strreverse ("abcdef")` will result in "fedcba".
1704  *
1705  * Note that g_strreverse() doesn't work on UTF-8 strings
1706  * containing multibyte characters. For that purpose, use
1707  * g_utf8_strreverse().
1708  *
1709  * Returns: the same pointer passed in as @string
1710  */
1711 gchar*
1712 g_strreverse (gchar *string)
1713 {
1714   g_return_val_if_fail (string != NULL, NULL);
1715
1716   if (*string)
1717     {
1718       gchar *h, *t;
1719
1720       h = string;
1721       t = string + strlen (string) - 1;
1722
1723       while (h < t)
1724         {
1725           gchar c;
1726
1727           c = *h;
1728           *h = *t;
1729           h++;
1730           *t = c;
1731           t--;
1732         }
1733     }
1734
1735   return string;
1736 }
1737
1738 /**
1739  * g_ascii_tolower:
1740  * @c: any character
1741  *
1742  * Convert a character to ASCII lower case.
1743  *
1744  * Unlike the standard C library tolower() function, this only
1745  * recognizes standard ASCII letters and ignores the locale, returning
1746  * all non-ASCII characters unchanged, even if they are lower case
1747  * letters in a particular character set. Also unlike the standard
1748  * library function, this takes and returns a char, not an int, so
1749  * don't call it on %EOF but no need to worry about casting to #guchar
1750  * before passing a possibly non-ASCII character in.
1751  *
1752  * Returns: the result of converting @c to lower case. If @c is
1753  *     not an ASCII upper case letter, @c is returned unchanged.
1754  */
1755 gchar
1756 g_ascii_tolower (gchar c)
1757 {
1758   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1759 }
1760
1761 /**
1762  * g_ascii_toupper:
1763  * @c: any character
1764  *
1765  * Convert a character to ASCII upper case.
1766  *
1767  * Unlike the standard C library toupper() function, this only
1768  * recognizes standard ASCII letters and ignores the locale, returning
1769  * all non-ASCII characters unchanged, even if they are upper case
1770  * letters in a particular character set. Also unlike the standard
1771  * library function, this takes and returns a char, not an int, so
1772  * don't call it on %EOF but no need to worry about casting to #guchar
1773  * before passing a possibly non-ASCII character in.
1774  *
1775  * Returns: the result of converting @c to upper case. If @c is not
1776  *    an ASCII lower case letter, @c is returned unchanged.
1777  */
1778 gchar
1779 g_ascii_toupper (gchar c)
1780 {
1781   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1782 }
1783
1784 /**
1785  * g_ascii_digit_value:
1786  * @c: an ASCII character
1787  *
1788  * Determines the numeric value of a character as a decimal digit.
1789  * Differs from g_unichar_digit_value() because it takes a char, so
1790  * there's no worry about sign extension if characters are signed.
1791  *
1792  * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1793  *    its numeric value. Otherwise, -1.
1794  */
1795 int
1796 g_ascii_digit_value (gchar c)
1797 {
1798   if (g_ascii_isdigit (c))
1799     return c - '0';
1800   return -1;
1801 }
1802
1803 /**
1804  * g_ascii_xdigit_value:
1805  * @c: an ASCII character.
1806  *
1807  * Determines the numeric value of a character as a hexadecimal
1808  * digit. Differs from g_unichar_xdigit_value() because it takes
1809  * a char, so there's no worry about sign extension if characters
1810  * are signed.
1811  *
1812  * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1813  *     its numeric value. Otherwise, -1.
1814  */
1815 int
1816 g_ascii_xdigit_value (gchar c)
1817 {
1818   if (c >= 'A' && c <= 'F')
1819     return c - 'A' + 10;
1820   if (c >= 'a' && c <= 'f')
1821     return c - 'a' + 10;
1822   return g_ascii_digit_value (c);
1823 }
1824
1825 /**
1826  * g_ascii_strcasecmp:
1827  * @s1: string to compare with @s2
1828  * @s2: string to compare with @s1
1829  *
1830  * Compare two strings, ignoring the case of ASCII characters.
1831  *
1832  * Unlike the BSD strcasecmp() function, this only recognizes standard
1833  * ASCII letters and ignores the locale, treating all non-ASCII
1834  * bytes as if they are not letters.
1835  *
1836  * This function should be used only on strings that are known to be
1837  * in encodings where the bytes corresponding to ASCII letters always
1838  * represent themselves. This includes UTF-8 and the ISO-8859-*
1839  * charsets, but not for instance double-byte encodings like the
1840  * Windows Codepage 932, where the trailing bytes of double-byte
1841  * characters include all ASCII letters. If you compare two CP932
1842  * strings using this function, you will get false matches.
1843  *
1844  * Both @s1 and @s2 must be non-%NULL.
1845  *
1846  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1847  *     or a positive value if @s1 > @s2.
1848  */
1849 gint
1850 g_ascii_strcasecmp (const gchar *s1,
1851                     const gchar *s2)
1852 {
1853   gint c1, c2;
1854
1855   g_return_val_if_fail (s1 != NULL, 0);
1856   g_return_val_if_fail (s2 != NULL, 0);
1857
1858   while (*s1 && *s2)
1859     {
1860       c1 = (gint)(guchar) TOLOWER (*s1);
1861       c2 = (gint)(guchar) TOLOWER (*s2);
1862       if (c1 != c2)
1863         return (c1 - c2);
1864       s1++; s2++;
1865     }
1866
1867   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1868 }
1869
1870 /**
1871  * g_ascii_strncasecmp:
1872  * @s1: string to compare with @s2
1873  * @s2: string to compare with @s1
1874  * @n: number of characters to compare
1875  *
1876  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1877  * characters after the first @n in each string.
1878  *
1879  * Unlike the BSD strcasecmp() function, this only recognizes standard
1880  * ASCII letters and ignores the locale, treating all non-ASCII
1881  * characters as if they are not letters.
1882  *
1883  * The same warning as in g_ascii_strcasecmp() applies: Use this
1884  * function only on strings known to be in encodings where bytes
1885  * corresponding to ASCII letters always represent themselves.
1886  *
1887  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1888  *     or a positive value if @s1 > @s2.
1889  */
1890 gint
1891 g_ascii_strncasecmp (const gchar *s1,
1892                      const gchar *s2,
1893                      gsize        n)
1894 {
1895   gint c1, c2;
1896
1897   g_return_val_if_fail (s1 != NULL, 0);
1898   g_return_val_if_fail (s2 != NULL, 0);
1899
1900   while (n && *s1 && *s2)
1901     {
1902       n -= 1;
1903       c1 = (gint)(guchar) TOLOWER (*s1);
1904       c2 = (gint)(guchar) TOLOWER (*s2);
1905       if (c1 != c2)
1906         return (c1 - c2);
1907       s1++; s2++;
1908     }
1909
1910   if (n)
1911     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1912   else
1913     return 0;
1914 }
1915
1916 /**
1917  * g_strcasecmp:
1918  * @s1: a string
1919  * @s2: a string to compare with @s1
1920  *
1921  * A case-insensitive string comparison, corresponding to the standard
1922  * strcasecmp() function on platforms which support it.
1923  *
1924  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1925  *     or a positive value if @s1 > @s2.
1926  *
1927  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1928  *     function is deprecated and how to replace it.
1929  */
1930 gint
1931 g_strcasecmp (const gchar *s1,
1932               const gchar *s2)
1933 {
1934 #ifdef HAVE_STRCASECMP
1935   g_return_val_if_fail (s1 != NULL, 0);
1936   g_return_val_if_fail (s2 != NULL, 0);
1937
1938   return strcasecmp (s1, s2);
1939 #else
1940   gint c1, c2;
1941
1942   g_return_val_if_fail (s1 != NULL, 0);
1943   g_return_val_if_fail (s2 != NULL, 0);
1944
1945   while (*s1 && *s2)
1946     {
1947       /* According to A. Cox, some platforms have islower's that
1948        * don't work right on non-uppercase
1949        */
1950       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952       if (c1 != c2)
1953         return (c1 - c2);
1954       s1++; s2++;
1955     }
1956
1957   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1958 #endif
1959 }
1960
1961 /**
1962  * g_strncasecmp:
1963  * @s1: a string
1964  * @s2: a string to compare with @s1
1965  * @n: the maximum number of characters to compare
1966  *
1967  * A case-insensitive string comparison, corresponding to the standard
1968  * strncasecmp() function on platforms which support it. It is similar
1969  * to g_strcasecmp() except it only compares the first @n characters of
1970  * the strings.
1971  *
1972  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1973  *     or a positive value if @s1 > @s2.
1974  *
1975  * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1976  *     the comparison by calling toupper()/tolower(). These functions
1977  *     are locale-specific and operate on single bytes. However, it is
1978  *     impossible to handle things correctly from an internationalization
1979  *     standpoint by operating on bytes, since characters may be multibyte.
1980  *     Thus g_strncasecmp() is broken if your string is guaranteed to be
1981  *     ASCII, since it is locale-sensitive, and it's broken if your string
1982  *     is localized, since it doesn't work on many encodings at all,
1983  *     including UTF-8, EUC-JP, etc.
1984  *
1985  *     There are therefore two replacement techniques: g_ascii_strncasecmp(),
1986  *     which only works on ASCII and is not locale-sensitive, and
1987  *     g_utf8_casefold() followed by strcmp() on the resulting strings,
1988  *     which is good for case-insensitive sorting of UTF-8.
1989  */
1990 gint
1991 g_strncasecmp (const gchar *s1,
1992                const gchar *s2,
1993                guint n)
1994 {
1995 #ifdef HAVE_STRNCASECMP
1996   return strncasecmp (s1, s2, n);
1997 #else
1998   gint c1, c2;
1999
2000   g_return_val_if_fail (s1 != NULL, 0);
2001   g_return_val_if_fail (s2 != NULL, 0);
2002
2003   while (n && *s1 && *s2)
2004     {
2005       n -= 1;
2006       /* According to A. Cox, some platforms have islower's that
2007        * don't work right on non-uppercase
2008        */
2009       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
2010       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
2011       if (c1 != c2)
2012         return (c1 - c2);
2013       s1++; s2++;
2014     }
2015
2016   if (n)
2017     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2018   else
2019     return 0;
2020 #endif
2021 }
2022
2023 /**
2024  * g_strdelimit:
2025  * @string: the string to convert
2026  * @delimiters: (nullable): a string containing the current delimiters,
2027  *     or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2028  * @new_delimiter: the new delimiter character
2029  *
2030  * Converts any delimiter characters in @string to @new_delimiter.
2031  * Any characters in @string which are found in @delimiters are
2032  * changed to the @new_delimiter character. Modifies @string in place,
2033  * and returns @string itself, not a copy. The return value is to
2034  * allow nesting such as
2035  * |[<!-- language="C" -->
2036  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
2037  * ]|
2038  *
2039  * In order to modify a copy, you may use `g_strdup()`:
2040  * |[<!-- language="C" -->
2041  *   reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2042  *   ...
2043  *   g_free (reformatted);
2044  * ]|
2045  *
2046  * Returns: @string
2047  */
2048 gchar *
2049 g_strdelimit (gchar       *string,
2050               const gchar *delimiters,
2051               gchar        new_delim)
2052 {
2053   gchar *c;
2054
2055   g_return_val_if_fail (string != NULL, NULL);
2056
2057   if (!delimiters)
2058     delimiters = G_STR_DELIMITERS;
2059
2060   for (c = string; *c; c++)
2061     {
2062       if (strchr (delimiters, *c))
2063         *c = new_delim;
2064     }
2065
2066   return string;
2067 }
2068
2069 /**
2070  * g_strcanon:
2071  * @string: a nul-terminated array of bytes
2072  * @valid_chars: bytes permitted in @string
2073  * @substitutor: replacement character for disallowed bytes
2074  *
2075  * For each character in @string, if the character is not in @valid_chars,
2076  * replaces the character with @substitutor. Modifies @string in place,
2077  * and return @string itself, not a copy. The return value is to allow
2078  * nesting such as
2079  * |[<!-- language="C" -->
2080  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
2081  * ]|
2082  *
2083  * In order to modify a copy, you may use `g_strdup()`:
2084  * |[<!-- language="C" -->
2085  *   reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2086  *   ...
2087  *   g_free (reformatted);
2088  * ]|
2089  *
2090  * Returns: @string
2091  */
2092 gchar *
2093 g_strcanon (gchar       *string,
2094             const gchar *valid_chars,
2095             gchar        substitutor)
2096 {
2097   gchar *c;
2098
2099   g_return_val_if_fail (string != NULL, NULL);
2100   g_return_val_if_fail (valid_chars != NULL, NULL);
2101
2102   for (c = string; *c; c++)
2103     {
2104       if (!strchr (valid_chars, *c))
2105         *c = substitutor;
2106     }
2107
2108   return string;
2109 }
2110
2111 /**
2112  * g_strcompress:
2113  * @source: a string to compress
2114  *
2115  * Replaces all escaped characters with their one byte equivalent.
2116  *
2117  * This function does the reverse conversion of g_strescape().
2118  *
2119  * Returns: a newly-allocated copy of @source with all escaped
2120  *     character compressed
2121  */
2122 gchar *
2123 g_strcompress (const gchar *source)
2124 {
2125   const gchar *p = source, *octal;
2126   gchar *dest;
2127   gchar *q;
2128
2129   g_return_val_if_fail (source != NULL, NULL);
2130
2131   dest = g_malloc (strlen (source) + 1);
2132   q = dest;
2133
2134   while (*p)
2135     {
2136       if (*p == '\\')
2137         {
2138           p++;
2139           switch (*p)
2140             {
2141             case '\0':
2142               g_warning ("g_strcompress: trailing \\");
2143               goto out;
2144             case '0':  case '1':  case '2':  case '3':  case '4':
2145             case '5':  case '6':  case '7':
2146               *q = 0;
2147               octal = p;
2148               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2149                 {
2150                   *q = (*q * 8) + (*p - '0');
2151                   p++;
2152                 }
2153               q++;
2154               p--;
2155               break;
2156             case 'b':
2157               *q++ = '\b';
2158               break;
2159             case 'f':
2160               *q++ = '\f';
2161               break;
2162             case 'n':
2163               *q++ = '\n';
2164               break;
2165             case 'r':
2166               *q++ = '\r';
2167               break;
2168             case 't':
2169               *q++ = '\t';
2170               break;
2171             case 'v':
2172               *q++ = '\v';
2173               break;
2174             default:            /* Also handles \" and \\ */
2175               *q++ = *p;
2176               break;
2177             }
2178         }
2179       else
2180         *q++ = *p;
2181       p++;
2182     }
2183 out:
2184   *q = 0;
2185
2186   return dest;
2187 }
2188
2189 /**
2190  * g_strescape:
2191  * @source: a string to escape
2192  * @exceptions: (nullable): a string of characters not to escape in @source
2193  *
2194  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2195  * and '"' in the string @source by inserting a '\' before
2196  * them. Additionally all characters in the range 0x01-0x1F (everything
2197  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2198  * replaced with a '\' followed by their octal representation.
2199  * Characters supplied in @exceptions are not escaped.
2200  *
2201  * g_strcompress() does the reverse conversion.
2202  *
2203  * Returns: a newly-allocated copy of @source with certain
2204  *     characters escaped. See above.
2205  */
2206 gchar *
2207 g_strescape (const gchar *source,
2208              const gchar *exceptions)
2209 {
2210   const guchar *p;
2211   gchar *dest;
2212   gchar *q;
2213   guchar excmap[256];
2214
2215   g_return_val_if_fail (source != NULL, NULL);
2216
2217   p = (guchar *) source;
2218   /* Each source byte needs maximally four destination chars (\777) */
2219   q = dest = g_malloc (strlen (source) * 4 + 1);
2220
2221   memset (excmap, 0, 256);
2222   if (exceptions)
2223     {
2224       guchar *e = (guchar *) exceptions;
2225
2226       while (*e)
2227         {
2228           excmap[*e] = 1;
2229           e++;
2230         }
2231     }
2232
2233   while (*p)
2234     {
2235       if (excmap[*p])
2236         *q++ = *p;
2237       else
2238         {
2239           switch (*p)
2240             {
2241             case '\b':
2242               *q++ = '\\';
2243               *q++ = 'b';
2244               break;
2245             case '\f':
2246               *q++ = '\\';
2247               *q++ = 'f';
2248               break;
2249             case '\n':
2250               *q++ = '\\';
2251               *q++ = 'n';
2252               break;
2253             case '\r':
2254               *q++ = '\\';
2255               *q++ = 'r';
2256               break;
2257             case '\t':
2258               *q++ = '\\';
2259               *q++ = 't';
2260               break;
2261             case '\v':
2262               *q++ = '\\';
2263               *q++ = 'v';
2264               break;
2265             case '\\':
2266               *q++ = '\\';
2267               *q++ = '\\';
2268               break;
2269             case '"':
2270               *q++ = '\\';
2271               *q++ = '"';
2272               break;
2273             default:
2274               if ((*p < ' ') || (*p >= 0177))
2275                 {
2276                   *q++ = '\\';
2277                   *q++ = '0' + (((*p) >> 6) & 07);
2278                   *q++ = '0' + (((*p) >> 3) & 07);
2279                   *q++ = '0' + ((*p) & 07);
2280                 }
2281               else
2282                 *q++ = *p;
2283               break;
2284             }
2285         }
2286       p++;
2287     }
2288   *q = 0;
2289   return dest;
2290 }
2291
2292 /**
2293  * g_strchug:
2294  * @string: a string to remove the leading whitespace from
2295  *
2296  * Removes leading whitespace from a string, by moving the rest
2297  * of the characters forward.
2298  *
2299  * This function doesn't allocate or reallocate any memory;
2300  * it modifies @string in place. Therefore, it cannot be used on
2301  * statically allocated strings.
2302  *
2303  * The pointer to @string is returned to allow the nesting of functions.
2304  *
2305  * Also see g_strchomp() and g_strstrip().
2306  *
2307  * Returns: @string
2308  */
2309 gchar *
2310 g_strchug (gchar *string)
2311 {
2312   guchar *start;
2313
2314   g_return_val_if_fail (string != NULL, NULL);
2315
2316   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2317     ;
2318
2319   memmove (string, start, strlen ((gchar *) start) + 1);
2320
2321   return string;
2322 }
2323
2324 /**
2325  * g_strchomp:
2326  * @string: a string to remove the trailing whitespace from
2327  *
2328  * Removes trailing whitespace from a string.
2329  *
2330  * This function doesn't allocate or reallocate any memory;
2331  * it modifies @string in place. Therefore, it cannot be used
2332  * on statically allocated strings.
2333  *
2334  * The pointer to @string is returned to allow the nesting of functions.
2335  *
2336  * Also see g_strchug() and g_strstrip().
2337  *
2338  * Returns: @string
2339  */
2340 gchar *
2341 g_strchomp (gchar *string)
2342 {
2343   gsize len;
2344
2345   g_return_val_if_fail (string != NULL, NULL);
2346
2347   len = strlen (string);
2348   while (len--)
2349     {
2350       if (g_ascii_isspace ((guchar) string[len]))
2351         string[len] = '\0';
2352       else
2353         break;
2354     }
2355
2356   return string;
2357 }
2358
2359 /**
2360  * g_strsplit:
2361  * @string: a string to split
2362  * @delimiter: a string which specifies the places at which to split
2363  *     the string. The delimiter is not included in any of the resulting
2364  *     strings, unless @max_tokens is reached.
2365  * @max_tokens: the maximum number of pieces to split @string into.
2366  *     If this is less than 1, the string is split completely.
2367  *
2368  * Splits a string into a maximum of @max_tokens pieces, using the given
2369  * @delimiter. If @max_tokens is reached, the remainder of @string is
2370  * appended to the last token.
2371  *
2372  * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2373  * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2374  * and "".
2375  *
2376  * As a special case, the result of splitting the empty string "" is an empty
2377  * vector, not a vector containing a single string. The reason for this
2378  * special case is that being able to represent an empty vector is typically
2379  * more useful than consistent handling of empty elements. If you do need
2380  * to represent empty elements, you'll need to check for the empty string
2381  * before calling g_strsplit().
2382  *
2383  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2384  *    g_strfreev() to free it.
2385  */
2386 gchar**
2387 g_strsplit (const gchar *string,
2388             const gchar *delimiter,
2389             gint         max_tokens)
2390 {
2391   char *s;
2392   const gchar *remainder;
2393   GPtrArray *string_list;
2394
2395   g_return_val_if_fail (string != NULL, NULL);
2396   g_return_val_if_fail (delimiter != NULL, NULL);
2397   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2398
2399   if (max_tokens < 1)
2400     max_tokens = G_MAXINT;
2401
2402   string_list = g_ptr_array_new ();
2403   remainder = string;
2404   s = strstr (remainder, delimiter);
2405   if (s)
2406     {
2407       gsize delimiter_len = strlen (delimiter);
2408
2409       while (--max_tokens && s)
2410         {
2411           gsize len;
2412
2413           len = s - remainder;
2414           g_ptr_array_add (string_list, g_strndup (remainder, len));
2415           remainder = s + delimiter_len;
2416           s = strstr (remainder, delimiter);
2417         }
2418     }
2419   if (*string)
2420     g_ptr_array_add (string_list, g_strdup (remainder));
2421
2422   g_ptr_array_add (string_list, NULL);
2423
2424   return (char **) g_ptr_array_free (string_list, FALSE);
2425 }
2426
2427 /**
2428  * g_strsplit_set:
2429  * @string: The string to be tokenized
2430  * @delimiters: A nul-terminated string containing bytes that are used
2431  *     to split the string (it can accept an empty string, which will result
2432  *     in no string splitting).
2433  * @max_tokens: The maximum number of tokens to split @string into.
2434  *     If this is less than 1, the string is split completely
2435  *
2436  * Splits @string into a number of tokens not containing any of the characters
2437  * in @delimiter. A token is the (possibly empty) longest string that does not
2438  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2439  * remainder is appended to the last token.
2440  *
2441  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2442  * %NULL-terminated vector containing the three strings "abc", "def",
2443  * and "ghi".
2444  *
2445  * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2446  * vector containing the four strings "", "def", "ghi", and "".
2447  *
2448  * As a special case, the result of splitting the empty string "" is an empty
2449  * vector, not a vector containing a single string. The reason for this
2450  * special case is that being able to represent an empty vector is typically
2451  * more useful than consistent handling of empty elements. If you do need
2452  * to represent empty elements, you'll need to check for the empty string
2453  * before calling g_strsplit_set().
2454  *
2455  * Note that this function works on bytes not characters, so it can't be used
2456  * to delimit UTF-8 strings for anything but ASCII characters.
2457  *
2458  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2459  *    g_strfreev() to free it.
2460  *
2461  * Since: 2.4
2462  **/
2463 gchar **
2464 g_strsplit_set (const gchar *string,
2465                 const gchar *delimiters,
2466                 gint         max_tokens)
2467 {
2468   guint8 delim_table[256]; /* 1 = index is a separator; 0 otherwise */
2469   GSList *tokens, *list;
2470   gint n_tokens;
2471   const gchar *s;
2472   const gchar *current;
2473   gchar *token;
2474   gchar **result;
2475
2476   g_return_val_if_fail (string != NULL, NULL);
2477   g_return_val_if_fail (delimiters != NULL, NULL);
2478
2479   if (max_tokens < 1)
2480     max_tokens = G_MAXINT;
2481
2482   if (*string == '\0')
2483     {
2484       result = g_new (char *, 1);
2485       result[0] = NULL;
2486       return result;
2487     }
2488
2489   /* Check if each character in @string is a separator, by indexing by the
2490    * character value into the @delim_table, which has value 1 stored at an index
2491    * if that index is a separator. */
2492   memset (delim_table, FALSE, sizeof (delim_table));
2493   for (s = delimiters; *s != '\0'; ++s)
2494     delim_table[*(guchar *)s] = TRUE;
2495
2496   tokens = NULL;
2497   n_tokens = 0;
2498
2499   s = current = string;
2500   while (*s != '\0')
2501     {
2502       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2503         {
2504           token = g_strndup (current, s - current);
2505           tokens = g_slist_prepend (tokens, token);
2506           ++n_tokens;
2507
2508           current = s + 1;
2509         }
2510
2511       ++s;
2512     }
2513
2514   token = g_strndup (current, s - current);
2515   tokens = g_slist_prepend (tokens, token);
2516   ++n_tokens;
2517
2518   result = g_new (gchar *, n_tokens + 1);
2519
2520   result[n_tokens] = NULL;
2521   for (list = tokens; list != NULL; list = list->next)
2522     result[--n_tokens] = list->data;
2523
2524   g_slist_free (tokens);
2525
2526   return result;
2527 }
2528
2529 /**
2530  * GStrv:
2531  *
2532  * A typedef alias for gchar**. This is mostly useful when used together with
2533  * g_auto().
2534  */
2535
2536 /**
2537  * g_strfreev:
2538  * @str_array: (nullable): a %NULL-terminated array of strings to free
2539  *
2540  * Frees a %NULL-terminated array of strings, as well as each
2541  * string it contains.
2542  *
2543  * If @str_array is %NULL, this function simply returns.
2544  */
2545 void
2546 g_strfreev (gchar **str_array)
2547 {
2548   if (str_array)
2549     {
2550       gsize i;
2551
2552       for (i = 0; str_array[i] != NULL; i++)
2553         g_free (str_array[i]);
2554
2555       g_free (str_array);
2556     }
2557 }
2558
2559 /**
2560  * g_strdupv:
2561  * @str_array: (nullable): a %NULL-terminated array of strings
2562  *
2563  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2564  * the new array should be freed by first freeing each string, then
2565  * the array itself. g_strfreev() does this for you. If called
2566  * on a %NULL value, g_strdupv() simply returns %NULL.
2567  *
2568  * Returns: (nullable): a new %NULL-terminated array of strings.
2569  */
2570 gchar**
2571 g_strdupv (gchar **str_array)
2572 {
2573   if (str_array)
2574     {
2575       gsize i;
2576       gchar **retval;
2577
2578       i = 0;
2579       while (str_array[i])
2580         ++i;
2581
2582       retval = g_new (gchar*, i + 1);
2583
2584       i = 0;
2585       while (str_array[i])
2586         {
2587           retval[i] = g_strdup (str_array[i]);
2588           ++i;
2589         }
2590       retval[i] = NULL;
2591
2592       return retval;
2593     }
2594   else
2595     return NULL;
2596 }
2597
2598 /**
2599  * g_strjoinv:
2600  * @separator: (nullable): a string to insert between each of the
2601  *     strings, or %NULL
2602  * @str_array: a %NULL-terminated array of strings to join
2603  *
2604  * Joins a number of strings together to form one long string, with the
2605  * optional @separator inserted between each of them. The returned string
2606  * should be freed with g_free().
2607  *
2608  * If @str_array has no items, the return value will be an
2609  * empty string. If @str_array contains a single item, @separator will not
2610  * appear in the resulting string.
2611  *
2612  * Returns: a newly-allocated string containing all of the strings joined
2613  *     together, with @separator between them
2614  */
2615 gchar*
2616 g_strjoinv (const gchar  *separator,
2617             gchar       **str_array)
2618 {
2619   gchar *string;
2620   gchar *ptr;
2621
2622   g_return_val_if_fail (str_array != NULL, NULL);
2623
2624   if (separator == NULL)
2625     separator = "";
2626
2627   if (*str_array)
2628     {
2629       gsize i;
2630       gsize len;
2631       gsize separator_len;
2632
2633       separator_len = strlen (separator);
2634       /* First part, getting length */
2635       len = 1 + strlen (str_array[0]);
2636       for (i = 1; str_array[i] != NULL; i++)
2637         len += strlen (str_array[i]);
2638       len += separator_len * (i - 1);
2639
2640       /* Second part, building string */
2641       string = g_new (gchar, len);
2642       ptr = g_stpcpy (string, *str_array);
2643       for (i = 1; str_array[i] != NULL; i++)
2644         {
2645           ptr = g_stpcpy (ptr, separator);
2646           ptr = g_stpcpy (ptr, str_array[i]);
2647         }
2648       }
2649   else
2650     string = g_strdup ("");
2651
2652   return string;
2653 }
2654
2655 /**
2656  * g_strjoin:
2657  * @separator: (nullable): a string to insert between each of the
2658  *     strings, or %NULL
2659  * @...: a %NULL-terminated list of strings to join
2660  *
2661  * Joins a number of strings together to form one long string, with the
2662  * optional @separator inserted between each of them. The returned string
2663  * should be freed with g_free().
2664  *
2665  * Returns: a newly-allocated string containing all of the strings joined
2666  *     together, with @separator between them
2667  */
2668 gchar*
2669 g_strjoin (const gchar *separator,
2670            ...)
2671 {
2672   gchar *string, *s;
2673   va_list args;
2674   gsize len;
2675   gsize separator_len;
2676   gchar *ptr;
2677
2678   if (separator == NULL)
2679     separator = "";
2680
2681   separator_len = strlen (separator);
2682
2683   va_start (args, separator);
2684
2685   s = va_arg (args, gchar*);
2686
2687   if (s)
2688     {
2689       /* First part, getting length */
2690       len = 1 + strlen (s);
2691
2692       s = va_arg (args, gchar*);
2693       while (s)
2694         {
2695           len += separator_len + strlen (s);
2696           s = va_arg (args, gchar*);
2697         }
2698       va_end (args);
2699
2700       /* Second part, building string */
2701       string = g_new (gchar, len);
2702
2703       va_start (args, separator);
2704
2705       s = va_arg (args, gchar*);
2706       ptr = g_stpcpy (string, s);
2707
2708       s = va_arg (args, gchar*);
2709       while (s)
2710         {
2711           ptr = g_stpcpy (ptr, separator);
2712           ptr = g_stpcpy (ptr, s);
2713           s = va_arg (args, gchar*);
2714         }
2715     }
2716   else
2717     string = g_strdup ("");
2718
2719   va_end (args);
2720
2721   return string;
2722 }
2723
2724
2725 /**
2726  * g_strstr_len:
2727  * @haystack: a nul-terminated string
2728  * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2729  *     can be used to mean "search the entire string", like `strstr()`.
2730  * @needle: the string to search for
2731  *
2732  * Searches the string @haystack for the first occurrence
2733  * of the string @needle, limiting the length of the search
2734  * to @haystack_len.
2735  *
2736  * Returns: a pointer to the found occurrence, or
2737  *    %NULL if not found.
2738  */
2739 gchar *
2740 g_strstr_len (const gchar *haystack,
2741               gssize       haystack_len,
2742               const gchar *needle)
2743 {
2744   g_return_val_if_fail (haystack != NULL, NULL);
2745   g_return_val_if_fail (needle != NULL, NULL);
2746
2747   if (haystack_len < 0)
2748     return strstr (haystack, needle);
2749   else
2750     {
2751       const gchar *p = haystack;
2752       gsize needle_len = strlen (needle);
2753       gsize haystack_len_unsigned = haystack_len;
2754       const gchar *end;
2755       gsize i;
2756
2757       if (needle_len == 0)
2758         return (gchar *)haystack;
2759
2760       if (haystack_len_unsigned < needle_len)
2761         return NULL;
2762
2763       end = haystack + haystack_len - needle_len;
2764
2765       while (p <= end && *p)
2766         {
2767           for (i = 0; i < needle_len; i++)
2768             if (p[i] != needle[i])
2769               goto next;
2770
2771           return (gchar *)p;
2772
2773         next:
2774           p++;
2775         }
2776
2777       return NULL;
2778     }
2779 }
2780
2781 /**
2782  * g_strrstr:
2783  * @haystack: a nul-terminated string
2784  * @needle: the nul-terminated string to search for
2785  *
2786  * Searches the string @haystack for the last occurrence
2787  * of the string @needle.
2788  *
2789  * Returns: a pointer to the found occurrence, or
2790  *    %NULL if not found.
2791  */
2792 gchar *
2793 g_strrstr (const gchar *haystack,
2794            const gchar *needle)
2795 {
2796   gsize i;
2797   gsize needle_len;
2798   gsize haystack_len;
2799   const gchar *p;
2800
2801   g_return_val_if_fail (haystack != NULL, NULL);
2802   g_return_val_if_fail (needle != NULL, NULL);
2803
2804   needle_len = strlen (needle);
2805   haystack_len = strlen (haystack);
2806
2807   if (needle_len == 0)
2808     return (gchar *)haystack;
2809
2810   if (haystack_len < needle_len)
2811     return NULL;
2812
2813   p = haystack + haystack_len - needle_len;
2814
2815   while (p >= haystack)
2816     {
2817       for (i = 0; i < needle_len; i++)
2818         if (p[i] != needle[i])
2819           goto next;
2820
2821       return (gchar *)p;
2822
2823     next:
2824       p--;
2825     }
2826
2827   return NULL;
2828 }
2829
2830 /**
2831  * g_strrstr_len:
2832  * @haystack: a nul-terminated string
2833  * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2834  *     can be used to mean "search the entire string", like g_strrstr().
2835  * @needle: the nul-terminated string to search for
2836  *
2837  * Searches the string @haystack for the last occurrence
2838  * of the string @needle, limiting the length of the search
2839  * to @haystack_len.
2840  *
2841  * Returns: a pointer to the found occurrence, or
2842  *    %NULL if not found.
2843  */
2844 gchar *
2845 g_strrstr_len (const gchar *haystack,
2846                gssize        haystack_len,
2847                const gchar *needle)
2848 {
2849   g_return_val_if_fail (haystack != NULL, NULL);
2850   g_return_val_if_fail (needle != NULL, NULL);
2851
2852   if (haystack_len < 0)
2853     return g_strrstr (haystack, needle);
2854   else
2855     {
2856       gsize needle_len = strlen (needle);
2857       const gchar *haystack_max = haystack + haystack_len;
2858       const gchar *p = haystack;
2859       gsize i;
2860
2861       while (p < haystack_max && *p)
2862         p++;
2863
2864       if (p < haystack + needle_len)
2865         return NULL;
2866
2867       p -= needle_len;
2868
2869       while (p >= haystack)
2870         {
2871           for (i = 0; i < needle_len; i++)
2872             if (p[i] != needle[i])
2873               goto next;
2874
2875           return (gchar *)p;
2876
2877         next:
2878           p--;
2879         }
2880
2881       return NULL;
2882     }
2883 }
2884
2885
2886 /**
2887  * g_str_has_suffix:
2888  * @str: a nul-terminated string
2889  * @suffix: the nul-terminated suffix to look for
2890  *
2891  * Looks whether the string @str ends with @suffix.
2892  *
2893  * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2894  *
2895  * Since: 2.2
2896  */
2897 gboolean
2898 g_str_has_suffix (const gchar *str,
2899                   const gchar *suffix)
2900 {
2901   gsize str_len;
2902   gsize suffix_len;
2903
2904   g_return_val_if_fail (str != NULL, FALSE);
2905   g_return_val_if_fail (suffix != NULL, FALSE);
2906
2907   str_len = strlen (str);
2908   suffix_len = strlen (suffix);
2909
2910   if (str_len < suffix_len)
2911     return FALSE;
2912
2913   return strcmp (str + str_len - suffix_len, suffix) == 0;
2914 }
2915
2916 /**
2917  * g_str_has_prefix:
2918  * @str: a nul-terminated string
2919  * @prefix: the nul-terminated prefix to look for
2920  *
2921  * Looks whether the string @str begins with @prefix.
2922  *
2923  * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2924  *
2925  * Since: 2.2
2926  */
2927 gboolean
2928 g_str_has_prefix (const gchar *str,
2929                   const gchar *prefix)
2930 {
2931   g_return_val_if_fail (str != NULL, FALSE);
2932   g_return_val_if_fail (prefix != NULL, FALSE);
2933
2934   return strncmp (str, prefix, strlen (prefix)) == 0;
2935 }
2936
2937 /**
2938  * g_strv_length:
2939  * @str_array: a %NULL-terminated array of strings
2940  *
2941  * Returns the length of the given %NULL-terminated
2942  * string array @str_array. @str_array must not be %NULL.
2943  *
2944  * Returns: length of @str_array.
2945  *
2946  * Since: 2.6
2947  */
2948 guint
2949 g_strv_length (gchar **str_array)
2950 {
2951   guint i = 0;
2952
2953   g_return_val_if_fail (str_array != NULL, 0);
2954
2955   while (str_array[i])
2956     ++i;
2957
2958   return i;
2959 }
2960
2961 static void
2962 index_add_folded (GPtrArray   *array,
2963                   const gchar *start,
2964                   const gchar *end)
2965 {
2966   gchar *normal;
2967
2968   normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2969
2970   /* TODO: Invent time machine.  Converse with Mustafa Ataturk... */
2971   if (strstr (normal, "ı") || strstr (normal, "İ"))
2972     {
2973       gchar *s = normal;
2974       GString *tmp;
2975
2976       tmp = g_string_new (NULL);
2977
2978       while (*s)
2979         {
2980           gchar *i, *I, *e;
2981
2982           i = strstr (s, "ı");
2983           I = strstr (s, "İ");
2984
2985           if (!i && !I)
2986             break;
2987           else if (i && !I)
2988             e = i;
2989           else if (I && !i)
2990             e = I;
2991           else if (i < I)
2992             e = i;
2993           else
2994             e = I;
2995
2996           g_string_append_len (tmp, s, e - s);
2997           g_string_append_c (tmp, 'i');
2998           s = g_utf8_next_char (e);
2999         }
3000
3001       g_string_append (tmp, s);
3002       g_free (normal);
3003       normal = g_string_free (tmp, FALSE);
3004     }
3005
3006   g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3007   g_free (normal);
3008 }
3009
3010 static gchar **
3011 split_words (const gchar *value)
3012 {
3013   const gchar *start = NULL;
3014   GPtrArray *result;
3015   const gchar *s;
3016
3017   result = g_ptr_array_new ();
3018
3019   for (s = value; *s; s = g_utf8_next_char (s))
3020     {
3021       gunichar c = g_utf8_get_char (s);
3022
3023       if (start == NULL)
3024         {
3025           if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3026             start = s;
3027         }
3028       else
3029         {
3030           if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3031             {
3032               index_add_folded (result, start, s);
3033               start = NULL;
3034             }
3035         }
3036     }
3037
3038   if (start)
3039     index_add_folded (result, start, s);
3040
3041   g_ptr_array_add (result, NULL);
3042
3043   return (gchar **) g_ptr_array_free (result, FALSE);
3044 }
3045
3046 /**
3047  * g_str_tokenize_and_fold:
3048  * @string: a string
3049  * @translit_locale: (nullable): the language code (like 'de' or
3050  *   'en_GB') from which @string originates
3051  * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3052  *   return location for ASCII alternates
3053  *
3054  * Tokenises @string and performs folding on each token.
3055  *
3056  * A token is a non-empty sequence of alphanumeric characters in the
3057  * source string, separated by non-alphanumeric characters.  An
3058  * "alphanumeric" character for this purpose is one that matches
3059  * g_unichar_isalnum() or g_unichar_ismark().
3060  *
3061  * Each token is then (Unicode) normalised and case-folded.  If
3062  * @ascii_alternates is non-%NULL and some of the returned tokens
3063  * contain non-ASCII characters, ASCII alternatives will be generated.
3064  *
3065  * The number of ASCII alternatives that are generated and the method
3066  * for doing so is unspecified, but @translit_locale (if specified) may
3067  * improve the transliteration if the language of the source string is
3068  * known.
3069  *
3070  * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3071  *
3072  * Since: 2.40
3073  **/
3074 gchar **
3075 g_str_tokenize_and_fold (const gchar   *string,
3076                          const gchar   *translit_locale,
3077                          gchar       ***ascii_alternates)
3078 {
3079   gchar **result;
3080
3081   g_return_val_if_fail (string != NULL, NULL);
3082
3083   if (ascii_alternates && g_str_is_ascii (string))
3084     {
3085       *ascii_alternates = g_new0 (gchar *, 0 + 1);
3086       ascii_alternates = NULL;
3087     }
3088
3089   result = split_words (string);
3090
3091   if (ascii_alternates)
3092     {
3093       gint i, j, n;
3094
3095       n = g_strv_length (result);
3096       *ascii_alternates = g_new (gchar *, n + 1);
3097       j = 0;
3098
3099       for (i = 0; i < n; i++)
3100         {
3101           if (!g_str_is_ascii (result[i]))
3102             {
3103               gchar *composed;
3104               gchar *ascii;
3105               gint k;
3106
3107               composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3108
3109               ascii = g_str_to_ascii (composed, translit_locale);
3110
3111               /* Only accept strings that are now entirely alnums */
3112               for (k = 0; ascii[k]; k++)
3113                 if (!g_ascii_isalnum (ascii[k]))
3114                   break;
3115
3116               if (ascii[k] == '\0')
3117                 /* Made it to the end... */
3118                 (*ascii_alternates)[j++] = ascii;
3119               else
3120                 g_free (ascii);
3121
3122               g_free (composed);
3123             }
3124         }
3125
3126       (*ascii_alternates)[j] = NULL;
3127     }
3128
3129   return result;
3130 }
3131
3132 /**
3133  * g_str_match_string:
3134  * @search_term: the search term from the user
3135  * @potential_hit: the text that may be a hit
3136  * @accept_alternates: %TRUE to accept ASCII alternates
3137  *
3138  * Checks if a search conducted for @search_term should match
3139  * @potential_hit.
3140  *
3141  * This function calls g_str_tokenize_and_fold() on both
3142  * @search_term and @potential_hit.  ASCII alternates are never taken
3143  * for @search_term but will be taken for @potential_hit according to
3144  * the value of @accept_alternates.
3145  *
3146  * A hit occurs when each folded token in @search_term is a prefix of a
3147  * folded token from @potential_hit.
3148  *
3149  * Depending on how you're performing the search, it will typically be
3150  * faster to call g_str_tokenize_and_fold() on each string in
3151  * your corpus and build an index on the returned folded tokens, then
3152  * call g_str_tokenize_and_fold() on the search term and
3153  * perform lookups into that index.
3154  *
3155  * As some examples, searching for ‘fred’ would match the potential hit
3156  * ‘Smith, Fred’ and also ‘Frédéric’.  Searching for ‘Fréd’ would match
3157  * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3158  * accent matching).  Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3159  * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3160  *
3161  * Returns: %TRUE if @potential_hit is a hit
3162  *
3163  * Since: 2.40
3164  **/
3165 gboolean
3166 g_str_match_string (const gchar *search_term,
3167                     const gchar *potential_hit,
3168                     gboolean     accept_alternates)
3169 {
3170   gchar **alternates = NULL;
3171   gchar **term_tokens;
3172   gchar **hit_tokens;
3173   gboolean matched;
3174   gint i, j;
3175
3176   g_return_val_if_fail (search_term != NULL, FALSE);
3177   g_return_val_if_fail (potential_hit != NULL, FALSE);
3178
3179   term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3180   hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3181
3182   matched = TRUE;
3183
3184   for (i = 0; term_tokens[i]; i++)
3185     {
3186       for (j = 0; hit_tokens[j]; j++)
3187         if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3188           goto one_matched;
3189
3190       if (accept_alternates)
3191         for (j = 0; alternates[j]; j++)
3192           if (g_str_has_prefix (alternates[j], term_tokens[i]))
3193             goto one_matched;
3194
3195       matched = FALSE;
3196       break;
3197
3198 one_matched:
3199       continue;
3200     }
3201
3202   g_strfreev (term_tokens);
3203   g_strfreev (hit_tokens);
3204   g_strfreev (alternates);
3205
3206   return matched;
3207 }
3208
3209 /**
3210  * g_strv_contains:
3211  * @strv: a %NULL-terminated array of strings
3212  * @str: a string
3213  *
3214  * Checks if @strv contains @str. @strv must not be %NULL.
3215  *
3216  * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3217  *
3218  * Since: 2.44
3219  */
3220 gboolean
3221 g_strv_contains (const gchar * const *strv,
3222                  const gchar         *str)
3223 {
3224   g_return_val_if_fail (strv != NULL, FALSE);
3225   g_return_val_if_fail (str != NULL, FALSE);
3226
3227   for (; *strv != NULL; strv++)
3228     {
3229       if (g_str_equal (str, *strv))
3230         return TRUE;
3231     }
3232
3233   return FALSE;
3234 }
3235
3236 /**
3237  * g_strv_equal:
3238  * @strv1: a %NULL-terminated array of strings
3239  * @strv2: another %NULL-terminated array of strings
3240  *
3241  * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3242  * same order. Elements are compared using g_str_equal(). To match independently
3243  * of order, sort the arrays first (using g_qsort_with_data() or similar).
3244  *
3245  * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3246  * %NULL.
3247  *
3248  * Returns: %TRUE if @strv1 and @strv2 are equal
3249  * Since: 2.60
3250  */
3251 gboolean
3252 g_strv_equal (const gchar * const *strv1,
3253               const gchar * const *strv2)
3254 {
3255   g_return_val_if_fail (strv1 != NULL, FALSE);
3256   g_return_val_if_fail (strv2 != NULL, FALSE);
3257
3258   if (strv1 == strv2)
3259     return TRUE;
3260
3261   for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3262     {
3263       if (!g_str_equal (*strv1, *strv2))
3264         return FALSE;
3265     }
3266
3267   return (*strv1 == NULL && *strv2 == NULL);
3268 }
3269
3270 static gboolean
3271 str_has_sign (const gchar *str)
3272 {
3273   return str[0] == '-' || str[0] == '+';
3274 }
3275
3276 static gboolean
3277 str_has_hex_prefix (const gchar *str)
3278 {
3279   return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3280 }
3281
3282 /**
3283  * g_ascii_string_to_signed:
3284  * @str: a string
3285  * @base: base of a parsed number
3286  * @min: a lower bound (inclusive)
3287  * @max: an upper bound (inclusive)
3288  * @out_num: (out) (optional): a return location for a number
3289  * @error: a return location for #GError
3290  *
3291  * A convenience function for converting a string to a signed number.
3292  *
3293  * This function assumes that @str contains only a number of the given
3294  * @base that is within inclusive bounds limited by @min and @max. If
3295  * this is true, then the converted number is stored in @out_num. An
3296  * empty string is not a valid input. A string with leading or
3297  * trailing whitespace is also an invalid input.
3298  *
3299  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3300  * not be prefixed with "0x" or "0X". Such a problem does not exist
3301  * for octal numbers, since they were usually prefixed with a zero
3302  * which does not change the value of the parsed number.
3303  *
3304  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3305  * domain. If the input is invalid, the error code will be
3306  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3307  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3308  *
3309  * See g_ascii_strtoll() if you have more complex needs such as
3310  * parsing a string which starts with a number, but then has other
3311  * characters.
3312  *
3313  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3314  *
3315  * Since: 2.54
3316  */
3317 gboolean
3318 g_ascii_string_to_signed (const gchar  *str,
3319                           guint         base,
3320                           gint64        min,
3321                           gint64        max,
3322                           gint64       *out_num,
3323                           GError      **error)
3324 {
3325   gint64 number;
3326   const gchar *end_ptr = NULL;
3327   gint saved_errno = 0;
3328
3329   g_return_val_if_fail (str != NULL, FALSE);
3330   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3331   g_return_val_if_fail (min <= max, FALSE);
3332   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3333
3334   if (str[0] == '\0')
3335     {
3336       g_set_error_literal (error,
3337                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3338                            _("Empty string is not a number"));
3339       return FALSE;
3340     }
3341
3342   errno = 0;
3343   number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3344   saved_errno = errno;
3345
3346   if (/* We do not allow leading whitespace, but g_ascii_strtoll
3347        * accepts it and just skips it, so we need to check for it
3348        * ourselves.
3349        */
3350       g_ascii_isspace (str[0]) ||
3351       /* We don't support hexadecimal numbers prefixed with 0x or
3352        * 0X.
3353        */
3354       (base == 16 &&
3355        (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3356       (saved_errno != 0 && saved_errno != ERANGE) ||
3357       end_ptr == NULL ||
3358       *end_ptr != '\0')
3359     {
3360       g_set_error (error,
3361                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3362                    _("“%s” is not a signed number"), str);
3363       return FALSE;
3364     }
3365   if (saved_errno == ERANGE || number < min || number > max)
3366     {
3367       gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3368       gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3369
3370       g_set_error (error,
3371                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3372                    _("Number “%s” is out of bounds [%s, %s]"),
3373                    str, min_str, max_str);
3374       g_free (min_str);
3375       g_free (max_str);
3376       return FALSE;
3377     }
3378   if (out_num != NULL)
3379     *out_num = number;
3380   return TRUE;
3381 }
3382
3383 /**
3384  * g_ascii_string_to_unsigned:
3385  * @str: a string
3386  * @base: base of a parsed number
3387  * @min: a lower bound (inclusive)
3388  * @max: an upper bound (inclusive)
3389  * @out_num: (out) (optional): a return location for a number
3390  * @error: a return location for #GError
3391  *
3392  * A convenience function for converting a string to an unsigned number.
3393  *
3394  * This function assumes that @str contains only a number of the given
3395  * @base that is within inclusive bounds limited by @min and @max. If
3396  * this is true, then the converted number is stored in @out_num. An
3397  * empty string is not a valid input. A string with leading or
3398  * trailing whitespace is also an invalid input. A string with a leading sign
3399  * (`-` or `+`) is not a valid input for the unsigned parser.
3400  *
3401  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3402  * not be prefixed with "0x" or "0X". Such a problem does not exist
3403  * for octal numbers, since they were usually prefixed with a zero
3404  * which does not change the value of the parsed number.
3405  *
3406  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3407  * domain. If the input is invalid, the error code will be
3408  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3409  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3410  *
3411  * See g_ascii_strtoull() if you have more complex needs such as
3412  * parsing a string which starts with a number, but then has other
3413  * characters.
3414  *
3415  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3416  *
3417  * Since: 2.54
3418  */
3419 gboolean
3420 g_ascii_string_to_unsigned (const gchar  *str,
3421                             guint         base,
3422                             guint64       min,
3423                             guint64       max,
3424                             guint64      *out_num,
3425                             GError      **error)
3426 {
3427   guint64 number;
3428   const gchar *end_ptr = NULL;
3429   gint saved_errno = 0;
3430
3431   g_return_val_if_fail (str != NULL, FALSE);
3432   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3433   g_return_val_if_fail (min <= max, FALSE);
3434   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3435
3436   if (str[0] == '\0')
3437     {
3438       g_set_error_literal (error,
3439                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3440                            _("Empty string is not a number"));
3441       return FALSE;
3442     }
3443
3444   errno = 0;
3445   number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3446   saved_errno = errno;
3447
3448   if (/* We do not allow leading whitespace, but g_ascii_strtoull
3449        * accepts it and just skips it, so we need to check for it
3450        * ourselves.
3451        */
3452       g_ascii_isspace (str[0]) ||
3453       /* Unsigned number should have no sign.
3454        */
3455       str_has_sign (str) ||
3456       /* We don't support hexadecimal numbers prefixed with 0x or
3457        * 0X.
3458        */
3459       (base == 16 && str_has_hex_prefix (str)) ||
3460       (saved_errno != 0 && saved_errno != ERANGE) ||
3461       end_ptr == NULL ||
3462       *end_ptr != '\0')
3463     {
3464       g_set_error (error,
3465                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3466                    _("“%s” is not an unsigned number"), str);
3467       return FALSE;
3468     }
3469   if (saved_errno == ERANGE || number < min || number > max)
3470     {
3471       gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3472       gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3473
3474       g_set_error (error,
3475                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3476                    _("Number “%s” is out of bounds [%s, %s]"),
3477                    str, min_str, max_str);
3478       g_free (min_str);
3479       g_free (max_str);
3480       return FALSE;
3481     }
3482   if (out_num != NULL)
3483     *out_num = number;
3484   return TRUE;
3485 }
3486
3487 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)