glib/gstrfuncs.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the
  16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17  * Boston, MA 02111-1307, USA.
  18  */
  19
  20 /*
  21  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
  22  * file for a list of people on the GLib Team.  See the ChangeLog
  23  * files for a list of changes.  These files are distributed with
  24  * GLib at ftp://ftp.gtk.org/pub/gtk/.
  25  */
  26
  27 /*
  28  * MT safe
  29  */
  30
  31 #include "config.h"
  32
  33 #include <stdarg.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <locale.h>
  37 #include <string.h>
  38 #include <locale.h>
  39 #include <errno.h>
  40 #include <ctype.h>              /* For tolower() */
  41
  42 #ifdef HAVE_XLOCALE_H\r
  43 /* Needed on BSD/OS X for e.g. strtod_l */\r
  44 #include <xlocale.h>\r
  45 #endif
  46
  47 #ifdef G_OS_WIN32
  48 #include <windows.h>
  49 #endif
  50
  51 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
  52
  53 #include "gstrfuncs.h"
  54
  55 #include "gprintf.h"
  56 #include "gprintfint.h"
  57 #include "glibintl.h"
  58
  59
  60 /**
  61  * SECTION:string_utils
  62  * @title: String Utility Functions
  63  * @short_description: various string-related functions
  64  *
  65  * This section describes a number of utility functions for creating,
  66  * duplicating, and manipulating strings.
  67  *
  68  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
  69  * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
  70  * are declared in the header <filename>gprintf.h</filename> which is
  71  * <emphasis>not</emphasis> included in <filename>glib.h</filename>
  72  * (otherwise using <filename>glib.h</filename> would drag in
  73  * <filename>stdio.h</filename>), so you'll have to explicitly include
  74  * <literal>&lt;glib/gprintf.h&gt;</literal> in order to use the GLib
  75  * printf() functions.
  76  *
  77  * <para id="string-precision">While you may use the printf() functions
  78  * to format UTF-8 strings, notice that the precision of a
  79  * <literal>&percnt;Ns</literal> parameter is interpreted as the
  80  * number of <emphasis>bytes</emphasis>, not <emphasis>characters</emphasis>
  81  * to print. On top of that, the GNU libc implementation of the printf()
  82  * functions has the "feature" that it checks that the string given for
  83  * the <literal>&percnt;Ns</literal> parameter consists of a whole number
  84  * of characters in the current encoding. So, unless you are sure you are
  85  * always going to be in an UTF-8 locale or your know your text is restricted
  86  * to ASCII, avoid using <literal>&percnt;Ns</literal>. If your intention is
  87  * to format strings for a certain number of columns, then
  88  * <literal>&percnt;Ns</literal> is not a correct solution anyway, since it
  89  * fails to take wide characters (see g_unichar_iswide()) into account.
  90  * </para>
  91  */
  92
  93 /**
  94  * g_ascii_isalnum:
  95  * @c: any character
  96  *
  97  * Determines whether a character is alphanumeric.
  98  *
  99  * Unlike the standard C library isalnum() function, this only
 100  * recognizes standard ASCII letters and ignores the locale,
 101  * returning %FALSE for all non-ASCII characters. Also, unlike
 102  * the standard library function, this takes a <type>char</type>,
 103  * not an <type>int</type>, so don't call it on %EOF, but no need to
 104  * cast to #guchar before passing a possibly non-ASCII character in.
 105  *
 106  * Returns: %TRUE if @c is an ASCII alphanumeric character
 107  */
 108
 109 /**
 110  * g_ascii_isalpha:
 111  * @c: any character
 112  *
 113  * Determines whether a character is alphabetic (i.e. a letter).
 114  *
 115  * Unlike the standard C library isalpha() function, this only
 116  * recognizes standard ASCII letters and ignores the locale,
 117  * returning %FALSE for all non-ASCII characters. Also, unlike
 118  * the standard library function, this takes a <type>char</type>,
 119  * not an <type>int</type>, so don't call it on %EOF, but no need to
 120  * cast to #guchar before passing a possibly non-ASCII character in.
 121  *
 122  * Returns: %TRUE if @c is an ASCII alphabetic character
 123  */
 124
 125 /**
 126  * g_ascii_iscntrl:
 127  * @c: any character
 128  *
 129  * Determines whether a character is a control character.
 130  *
 131  * Unlike the standard C library iscntrl() function, this only
 132  * recognizes standard ASCII control characters and ignores the
 133  * locale, returning %FALSE for all non-ASCII characters. Also,
 134  * unlike the standard library function, this takes a <type>char</type>,
 135  * not an <type>int</type>, so don't call it on %EOF, but no need to
 136  * cast to #guchar before passing a possibly non-ASCII character in.
 137  *
 138  * Returns: %TRUE if @c is an ASCII control character.
 139  */
 140
 141 /**
 142  * g_ascii_isdigit:
 143  * @c: any character
 144  *
 145  * Determines whether a character is digit (0-9).
 146  *
 147  * Unlike the standard C library isdigit() function, this takes
 148  * a <type>char</type>, not an <type>int</type>, so don't call it
 149  * on %EOF, but no need to cast to #guchar before passing a possibly
 150  * non-ASCII character in.
 151  *
 152  * Returns: %TRUE if @c is an ASCII digit.
 153  */
 154
 155 /**
 156  * g_ascii_isgraph:
 157  * @c: any character
 158  *
 159  * Determines whether a character is a printing character and not a space.
 160  *
 161  * Unlike the standard C library isgraph() function, this only
 162  * recognizes standard ASCII characters and ignores the locale,
 163  * returning %FALSE for all non-ASCII characters. Also, unlike
 164  * the standard library function, this takes a <type>char</type>,
 165  * not an <type>int</type>, so don't call it on %EOF, but no need
 166  * to cast to #guchar before passing a possibly non-ASCII character in.
 167  *
 168  * Returns: %TRUE if @c is an ASCII printing character other than space.
 169  */
 170
 171 /**
 172  * g_ascii_islower:
 173  * @c: any character
 174  *
 175  * Determines whether a character is an ASCII lower case letter.
 176  *
 177  * Unlike the standard C library islower() function, this only
 178  * recognizes standard ASCII letters and ignores the locale,
 179  * returning %FALSE for all non-ASCII characters. Also, unlike
 180  * the standard library function, this takes a <type>char</type>,
 181  * not an <type>int</type>, so don't call it on %EOF, but no need
 182  * to worry about casting to #guchar before passing a possibly
 183  * non-ASCII character in.
 184  *
 185  * Returns: %TRUE if @c is an ASCII lower case letter
 186  */
 187
 188 /**
 189  * g_ascii_isprint:
 190  * @c: any character
 191  *
 192  * Determines whether a character is a printing character.
 193  *
 194  * Unlike the standard C library isprint() function, this only
 195  * recognizes standard ASCII characters and ignores the locale,
 196  * returning %FALSE for all non-ASCII characters. Also, unlike
 197  * the standard library function, this takes a <type>char</type>,
 198  * not an <type>int</type>, so don't call it on %EOF, but no need
 199  * to cast to #guchar before passing a possibly non-ASCII character in.
 200  *
 201  * Returns: %TRUE if @c is an ASCII printing character.
 202  */
 203
 204 /**
 205  * g_ascii_ispunct:
 206  * @c: any character
 207  *
 208  * Determines whether a character is a punctuation character.
 209  *
 210  * Unlike the standard C library ispunct() function, this only
 211  * recognizes standard ASCII letters and ignores the locale,
 212  * returning %FALSE for all non-ASCII characters. Also, unlike
 213  * the standard library function, this takes a <type>char</type>,
 214  * not an <type>int</type>, so don't call it on %EOF, but no need to
 215  * cast to #guchar before passing a possibly non-ASCII character in.
 216  *
 217  * Returns: %TRUE if @c is an ASCII punctuation character.
 218  */
 219
 220 /**
 221  * g_ascii_isspace:
 222  * @c: any character
 223  *
 224  * Determines whether a character is a white-space character.
 225  *
 226  * Unlike the standard C library isspace() function, this only
 227  * recognizes standard ASCII white-space and ignores the locale,
 228  * returning %FALSE for all non-ASCII characters. Also, unlike
 229  * the standard library function, this takes a <type>char</type>,
 230  * not an <type>int</type>, so don't call it on %EOF, but no need to
 231  * cast to #guchar before passing a possibly non-ASCII character in.
 232  *
 233  * Returns: %TRUE if @c is an ASCII white-space character
 234  */
 235
 236 /**
 237  * g_ascii_isupper:
 238  * @c: any character
 239  *
 240  * Determines whether a character is an ASCII upper case letter.
 241  *
 242  * Unlike the standard C library isupper() function, this only
 243  * recognizes standard ASCII letters and ignores the locale,
 244  * returning %FALSE for all non-ASCII characters. Also, unlike
 245  * the standard library function, this takes a <type>char</type>,
 246  * not an <type>int</type>, so don't call it on %EOF, but no need to
 247  * worry about casting to #guchar before passing a possibly non-ASCII
 248  * character in.
 249  *
 250  * Returns: %TRUE if @c is an ASCII upper case letter
 251  */
 252
 253 /**
 254  * g_ascii_isxdigit:
 255  * @c: any character
 256  *
 257  * Determines whether a character is a hexadecimal-digit character.
 258  *
 259  * Unlike the standard C library isxdigit() function, this takes
 260  * a <type>char</type>, not an <type>int</type>, so don't call it
 261  * on %EOF, but no need to cast to #guchar before passing a
 262  * possibly non-ASCII character in.
 263  *
 264  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
 265  */
 266
 267 /**
 268  * G_ASCII_DTOSTR_BUF_SIZE:
 269  *
 270  * A good size for a buffer to be passed into g_ascii_dtostr().
 271  * It is guaranteed to be enough for all output of that function
 272  * on systems with 64bit IEEE-compatible doubles.
 273  *
 274  * The typical usage would be something like:
 275  * |[
 276  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
 277  *
 278  *   fprintf (out, "value=&percnt;s\n", g_ascii_dtostr (buf, sizeof (buf), value));
 279  * ]|
 280  */
 281
 282 /**
 283  * g_strstrip:
 284  * @string: a string to remove the leading and trailing whitespace from
 285  *
 286  * Removes leading and trailing whitespace from a string.
 287  * See g_strchomp() and g_strchug().
 288  *
 289  * Returns: @string
 290  */
 291
 292 /**
 293  * G_STR_DELIMITERS:
 294  *
 295  * The standard delimiters, used in g_strdelimit().
 296  */
 297
 298 static const guint16 ascii_table_data[256] = {
 299   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 300   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
 301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 302   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 303   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 304   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 305   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
 306   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 307   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
 308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 309   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 310   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 311   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
 312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 313   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 314   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
 315   /* the upper 128 are all zeroes */
 316 };
 317
 318 const guint16 * const g_ascii_table = ascii_table_data;
 319
 320 #ifdef HAVE_NEWLOCALE
 321 static locale_t
 322 get_C_locale (void)
 323 {
 324   static gsize initialized = FALSE;
 325   static locale_t C_locale = NULL;
 326
 327   if (g_once_init_enter (&initialized))
 328     {
 329       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
 330       g_once_init_leave (&initialized, TRUE);
 331     }
 332
 333   return C_locale;
 334 }
 335 #endif
 336
 337 /**
 338  * g_strdup:
 339  * @str: the string to duplicate
 340  *
 341  * Duplicates a string. If @str is %NULL it returns %NULL.
 342  * The returned string should be freed with g_free()
 343  * when no longer needed.
 344  *
 345  * Returns: a newly-allocated copy of @str
 346  */
 347 gchar*
 348 g_strdup (const gchar *str)
 349 {
 350   gchar *new_str;
 351   gsize length;
 352
 353   if (str)
 354     {
 355       length = strlen (str) + 1;
 356       new_str = g_new (char, length);
 357       memcpy (new_str, str, length);
 358     }
 359   else
 360     new_str = NULL;
 361
 362   return new_str;
 363 }
 364
 365 /**
 366  * g_memdup:
 367  * @mem: the memory to copy.
 368  * @byte_size: the number of bytes to copy.
 369  *
 370  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
 371  * from @mem. If @mem is %NULL it returns %NULL.
 372  *
 373  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
 374  *  is %NULL.
 375  */
 376 gpointer
 377 g_memdup (gconstpointer mem,
 378           guint         byte_size)
 379 {
 380   gpointer new_mem;
 381
 382   if (mem)
 383     {
 384       new_mem = g_malloc (byte_size);
 385       memcpy (new_mem, mem, byte_size);
 386     }
 387   else
 388     new_mem = NULL;
 389
 390   return new_mem;
 391 }
 392
 393 /**
 394  * g_strndup:
 395  * @str: the string to duplicate
 396  * @n: the maximum number of bytes to copy from @str
 397  *
 398  * Duplicates the first @n bytes of a string, returning a newly-allocated
 399  * buffer @n + 1 bytes long which will always be nul-terminated.
 400  * If @str is less than @n bytes long the buffer is padded with nuls.
 401  * If @str is %NULL it returns %NULL.
 402  * The returned value should be freed when no longer needed.
 403  *
 404  * <note><para>
 405  * To copy a number of characters from a UTF-8 encoded string, use
 406  * g_utf8_strncpy() instead.
 407  * </para></note>
 408  *
 409  * Returns: a newly-allocated buffer containing the first @n bytes
 410  *          of @str, nul-terminated
 411  */
 412 gchar*
 413 g_strndup (const gchar *str,
 414            gsize        n)
 415 {
 416   gchar *new_str;
 417
 418   if (str)
 419     {
 420       new_str = g_new (gchar, n + 1);
 421       strncpy (new_str, str, n);
 422       new_str[n] = '\0';
 423     }
 424   else
 425     new_str = NULL;
 426
 427   return new_str;
 428 }
 429
 430 /**
 431  * g_strnfill:
 432  * @length: the length of the new string
 433  * @fill_char: the byte to fill the string with
 434  *
 435  * Creates a new string @length bytes long filled with @fill_char.
 436  * The returned string should be freed when no longer needed.
 437  *
 438  * Returns: a newly-allocated string filled the @fill_char
 439  */
 440 gchar*
 441 g_strnfill (gsize length,
 442             gchar fill_char)
 443 {
 444   gchar *str;
 445
 446   str = g_new (gchar, length + 1);
 447   memset (str, (guchar)fill_char, length);
 448   str[length] = '\0';
 449
 450   return str;
 451 }
 452
 453 /**
 454  * g_stpcpy:
 455  * @dest: destination buffer.
 456  * @src: source string.
 457  *
 458  * Copies a nul-terminated string into the dest buffer, include the
 459  * trailing nul, and return a pointer to the trailing nul byte.
 460  * This is useful for concatenating multiple strings together
 461  * without having to repeatedly scan for the end.
 462  *
 463  * Return value: a pointer to trailing nul byte.
 464  **/
 465 gchar *
 466 g_stpcpy (gchar       *dest,
 467           const gchar *src)
 468 {
 469 #ifdef HAVE_STPCPY
 470   g_return_val_if_fail (dest != NULL, NULL);
 471   g_return_val_if_fail (src != NULL, NULL);
 472   return stpcpy (dest, src);
 473 #else
 474   register gchar *d = dest;
 475   register const gchar *s = src;
 476
 477   g_return_val_if_fail (dest != NULL, NULL);
 478   g_return_val_if_fail (src != NULL, NULL);
 479   do
 480     *d++ = *s;
 481   while (*s++ != '\0');
 482
 483   return d - 1;
 484 #endif
 485 }
 486
 487 /**
 488  * g_strdup_vprintf:
 489  * @format: a standard printf() format string, but notice
 490  *     <link linkend="string-precision">string precision pitfalls</link>
 491  * @args: the list of parameters to insert into the format string
 492  *
 493  * Similar to the standard C vsprintf() function but safer, since it
 494  * calculates the maximum space required and allocates memory to hold
 495  * the result. The returned string should be freed with g_free() when
 496  * no longer needed.
 497  *
 498  * See also g_vasprintf(), which offers the same functionality, but
 499  * additionally returns the length of the allocated string.
 500  *
 501  * Returns: a newly-allocated string holding the result
 502  */
 503 gchar*
 504 g_strdup_vprintf (const gchar *format,
 505                   va_list      args)
 506 {
 507   gchar *string = NULL;
 508
 509   g_vasprintf (&string, format, args);
 510
 511   return string;
 512 }
 513
 514 /**
 515  * g_strdup_printf:
 516  * @format: a standard printf() format string, but notice
 517  *     <link linkend="string-precision">string precision pitfalls</link>
 518  * @...: the parameters to insert into the format string
 519  *
 520  * Similar to the standard C sprintf() function but safer, since it
 521  * calculates the maximum space required and allocates memory to hold
 522  * the result. The returned string should be freed with g_free() when no
 523  * longer needed.
 524  *
 525  * Returns: a newly-allocated string holding the result
 526  */
 527 gchar*
 528 g_strdup_printf (const gchar *format,
 529                  ...)
 530 {
 531   gchar *buffer;
 532   va_list args;
 533
 534   va_start (args, format);
 535   buffer = g_strdup_vprintf (format, args);
 536   va_end (args);
 537
 538   return buffer;
 539 }
 540
 541 /**
 542  * g_strconcat:
 543  * @string1: the first string to add, which must not be %NULL
 544  * @...: a %NULL-terminated list of strings to append to the string
 545  *
 546  * Concatenates all of the given strings into one long string.
 547  * The returned string should be freed with g_free() when no longer needed.
 548  *
 549  * Note that this function is usually not the right function to use to
 550  * assemble a translated message from pieces, since proper translation
 551  * often requires the pieces to be reordered.
 552  *
 553  * <warning><para>The variable argument list <emphasis>must</emphasis> end
 554  * with %NULL. If you forget the %NULL, g_strconcat() will start appending
 555  * random memory junk to your string.</para></warning>
 556  *
 557  * Returns: a newly-allocated string containing all the string arguments
 558  */
 559 gchar*
 560 g_strconcat (const gchar *string1, ...)
 561 {
 562   gsize   l;
 563   va_list args;
 564   gchar   *s;
 565   gchar   *concat;
 566   gchar   *ptr;
 567
 568   if (!string1)
 569     return NULL;
 570
 571   l = 1 + strlen (string1);
 572   va_start (args, string1);
 573   s = va_arg (args, gchar*);
 574   while (s)
 575     {
 576       l += strlen (s);
 577       s = va_arg (args, gchar*);
 578     }
 579   va_end (args);
 580
 581   concat = g_new (gchar, l);
 582   ptr = concat;
 583
 584   ptr = g_stpcpy (ptr, string1);
 585   va_start (args, string1);
 586   s = va_arg (args, gchar*);
 587   while (s)
 588     {
 589       ptr = g_stpcpy (ptr, s);
 590       s = va_arg (args, gchar*);
 591     }
 592   va_end (args);
 593
 594   return concat;
 595 }
 596
 597 /**
 598  * g_strtod:
 599  * @nptr:    the string to convert to a numeric value.
 600  * @endptr:  if non-%NULL, it returns the character after
 601  *           the last character used in the conversion.
 602  *
 603  * Converts a string to a #gdouble value.
 604  * It calls the standard strtod() function to handle the conversion, but
 605  * if the string is not completely converted it attempts the conversion
 606  * again with g_ascii_strtod(), and returns the best match.
 607  *
 608  * This function should seldom be used. The normal situation when reading
 609  * numbers not for human consumption is to use g_ascii_strtod(). Only when
 610  * you know that you must expect both locale formatted and C formatted numbers
 611  * should you use this. Make sure that you don't pass strings such as comma
 612  * separated lists of values, since the commas may be interpreted as a decimal
 613  * point in some locales, causing unexpected results.
 614  *
 615  * Return value: the #gdouble value.
 616  **/
 617 gdouble
 618 g_strtod (const gchar *nptr,
 619           gchar      **endptr)
 620 {
 621   gchar *fail_pos_1;
 622   gchar *fail_pos_2;
 623   gdouble val_1;
 624   gdouble val_2 = 0;
 625
 626   g_return_val_if_fail (nptr != NULL, 0);
 627
 628   fail_pos_1 = NULL;
 629   fail_pos_2 = NULL;
 630
 631   val_1 = strtod (nptr, &fail_pos_1);
 632
 633   if (fail_pos_1 && fail_pos_1[0] != 0)
 634     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
 635
 636   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
 637     {
 638       if (endptr)
 639         *endptr = fail_pos_1;
 640       return val_1;
 641     }
 642   else
 643     {
 644       if (endptr)
 645         *endptr = fail_pos_2;
 646       return val_2;
 647     }
 648 }
 649
 650 /**
 651  * g_ascii_strtod:
 652  * @nptr:    the string to convert to a numeric value.
 653  * @endptr:  if non-%NULL, it returns the character after
 654  *           the last character used in the conversion.
 655  *
 656  * Converts a string to a #gdouble value.
 657  *
 658  * This function behaves like the standard strtod() function
 659  * does in the C locale. It does this without actually changing
 660  * the current locale, since that would not be thread-safe.
 661  * A limitation of the implementation is that this function
 662  * will still accept localized versions of infinities and NANs.
 663  *
 664  * This function is typically used when reading configuration
 665  * files or other non-user input that should be locale independent.
 666  * To handle input from the user you should normally use the
 667  * locale-sensitive system strtod() function.
 668  *
 669  * To convert from a #gdouble to a string in a locale-insensitive
 670  * way, use g_ascii_dtostr().
 671  *
 672  * If the correct value would cause overflow, plus or minus %HUGE_VAL
 673  * is returned (according to the sign of the value), and %ERANGE is
 674  * stored in %errno. If the correct value would cause underflow,
 675  * zero is returned and %ERANGE is stored in %errno.
 676  *
 677  * This function resets %errno before calling strtod() so that
 678  * you can reliably detect overflow and underflow.
 679  *
 680  * Return value: the #gdouble value.
 681  */
 682 gdouble
 683 g_ascii_strtod (const gchar *nptr,
 684                 gchar      **endptr)
 685 {
 686   gchar *fail_pos;
 687   gdouble val;
 688   struct lconv *locale_data;
 689   const char *decimal_point;
 690   int decimal_point_len;
 691   const char *p, *decimal_point_pos;
 692   const char *end = NULL; /* Silence gcc */
 693   int strtod_errno;
 694
 695   g_return_val_if_fail (nptr != NULL, 0);
 696
 697 #ifdef HAVE_STRTOD_L
 698   errno = 0;
 699
 700   return strtod_l (nptr, endptr, get_C_locale ());
 701 #else
 702
 703   fail_pos = NULL;
 704
 705   locale_data = localeconv ();
 706   decimal_point = locale_data->decimal_point;
 707   decimal_point_len = strlen (decimal_point);
 708
 709   g_assert (decimal_point_len != 0);
 710
 711   decimal_point_pos = NULL;
 712   end = NULL;
 713
 714   if (decimal_point[0] != '.' ||
 715       decimal_point[1] != 0)
 716     {
 717       p = nptr;
 718       /* Skip leading space */
 719       while (g_ascii_isspace (*p))
 720         p++;
 721
 722       /* Skip leading optional sign */
 723       if (*p == '+' || *p == '-')
 724         p++;
 725
 726       if (p[0] == '0' &&
 727           (p[1] == 'x' || p[1] == 'X'))
 728         {
 729           p += 2;
 730           /* HEX - find the (optional) decimal point */
 731
 732           while (g_ascii_isxdigit (*p))
 733             p++;
 734
 735           if (*p == '.')
 736             decimal_point_pos = p++;
 737
 738           while (g_ascii_isxdigit (*p))
 739             p++;
 740
 741           if (*p == 'p' || *p == 'P')
 742             p++;
 743           if (*p == '+' || *p == '-')
 744             p++;
 745           while (g_ascii_isdigit (*p))
 746             p++;
 747
 748           end = p;
 749         }
 750       else if (g_ascii_isdigit (*p) || *p == '.')
 751         {
 752           while (g_ascii_isdigit (*p))
 753             p++;
 754
 755           if (*p == '.')
 756             decimal_point_pos = p++;
 757
 758           while (g_ascii_isdigit (*p))
 759             p++;
 760
 761           if (*p == 'e' || *p == 'E')
 762             p++;
 763           if (*p == '+' || *p == '-')
 764             p++;
 765           while (g_ascii_isdigit (*p))
 766             p++;
 767
 768           end = p;
 769         }
 770       /* For the other cases, we need not convert the decimal point */
 771     }
 772
 773   if (decimal_point_pos)
 774     {
 775       char *copy, *c;
 776
 777       /* We need to convert the '.' to the locale specific decimal point */
 778       copy = g_malloc (end - nptr + 1 + decimal_point_len);
 779
 780       c = copy;
 781       memcpy (c, nptr, decimal_point_pos - nptr);
 782       c += decimal_point_pos - nptr;
 783       memcpy (c, decimal_point, decimal_point_len);
 784       c += decimal_point_len;
 785       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
 786       c += end - (decimal_point_pos + 1);
 787       *c = 0;
 788
 789       errno = 0;
 790       val = strtod (copy, &fail_pos);
 791       strtod_errno = errno;
 792
 793       if (fail_pos)
 794         {
 795           if (fail_pos - copy > decimal_point_pos - nptr)
 796             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
 797           else
 798             fail_pos = (char *)nptr + (fail_pos - copy);
 799         }
 800
 801       g_free (copy);
 802
 803     }
 804   else if (end)
 805     {
 806       char *copy;
 807
 808       copy = g_malloc (end - (char *)nptr + 1);
 809       memcpy (copy, nptr, end - nptr);
 810       *(copy + (end - (char *)nptr)) = 0;
 811
 812       errno = 0;
 813       val = strtod (copy, &fail_pos);
 814       strtod_errno = errno;
 815
 816       if (fail_pos)
 817         {
 818           fail_pos = (char *)nptr + (fail_pos - copy);
 819         }
 820
 821       g_free (copy);
 822     }
 823   else
 824     {
 825       errno = 0;
 826       val = strtod (nptr, &fail_pos);
 827       strtod_errno = errno;
 828     }
 829
 830   if (endptr)
 831     *endptr = fail_pos;
 832
 833   errno = strtod_errno;
 834
 835   return val;
 836 #endif
 837 }
 838
 839
 840 /**
 841  * g_ascii_dtostr:
 842  * @buffer: A buffer to place the resulting string in
 843  * @buf_len: The length of the buffer.
 844  * @d: The #gdouble to convert
 845  *
 846  * Converts a #gdouble to a string, using the '.' as
 847  * decimal point.
 848  *
 849  * This functions generates enough precision that converting
 850  * the string back using g_ascii_strtod() gives the same machine-number
 851  * (on machines with IEEE compatible 64bit doubles). It is
 852  * guaranteed that the size of the resulting string will never
 853  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes.
 854  *
 855  * Return value: The pointer to the buffer with the converted string.
 856  **/
 857 gchar *
 858 g_ascii_dtostr (gchar       *buffer,
 859                 gint         buf_len,
 860                 gdouble      d)
 861 {
 862   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
 863 }
 864
 865 /**
 866  * g_ascii_formatd:
 867  * @buffer: A buffer to place the resulting string in
 868  * @buf_len: The length of the buffer.
 869  * @format: The printf()-style format to use for the
 870  *          code to use for converting.
 871  * @d: The #gdouble to convert
 872  *
 873  * Converts a #gdouble to a string, using the '.' as
 874  * decimal point. To format the number you pass in
 875  * a printf()-style format string. Allowed conversion
 876  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
 877  *
 878  * If you just want to want to serialize the value into a
 879  * string, use g_ascii_dtostr().
 880  *
 881  * Return value: The pointer to the buffer with the converted string.
 882  */
 883 gchar *
 884 g_ascii_formatd (gchar       *buffer,
 885                  gint         buf_len,
 886                  const gchar *format,
 887                  gdouble      d)
 888 {
 889 #ifdef HAVE_USELOCALE
 890   locale_t old_locale;
 891
 892   old_locale = uselocale (get_C_locale ());
 893   _g_snprintf (buffer, buf_len, format, d);
 894   uselocale (old_locale);
 895
 896   return buffer;
 897 #else
 898   struct lconv *locale_data;
 899   const char *decimal_point;
 900   int decimal_point_len;
 901   gchar *p;
 902   int rest_len;
 903   gchar format_char;
 904
 905   g_return_val_if_fail (buffer != NULL, NULL);
 906   g_return_val_if_fail (format[0] == '%', NULL);
 907   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
 908
 909   format_char = format[strlen (format) - 1];
 910
 911   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
 912                         format_char == 'f' || format_char == 'F' ||
 913                         format_char == 'g' || format_char == 'G',
 914                         NULL);
 915
 916   if (format[0] != '%')
 917     return NULL;
 918
 919   if (strpbrk (format + 1, "'l%"))
 920     return NULL;
 921
 922   if (!(format_char == 'e' || format_char == 'E' ||
 923         format_char == 'f' || format_char == 'F' ||
 924         format_char == 'g' || format_char == 'G'))
 925     return NULL;
 926
 927   _g_snprintf (buffer, buf_len, format, d);
 928
 929   locale_data = localeconv ();
 930   decimal_point = locale_data->decimal_point;
 931   decimal_point_len = strlen (decimal_point);
 932
 933   g_assert (decimal_point_len != 0);
 934
 935   if (decimal_point[0] != '.' ||
 936       decimal_point[1] != 0)
 937     {
 938       p = buffer;
 939
 940       while (g_ascii_isspace (*p))
 941         p++;
 942
 943       if (*p == '+' || *p == '-')
 944         p++;
 945
 946       while (isdigit ((guchar)*p))
 947         p++;
 948
 949       if (strncmp (p, decimal_point, decimal_point_len) == 0)
 950         {
 951           *p = '.';
 952           p++;
 953           if (decimal_point_len > 1)
 954             {
 955               rest_len = strlen (p + (decimal_point_len-1));
 956               memmove (p, p + (decimal_point_len-1), rest_len);
 957               p[rest_len] = 0;
 958             }
 959         }
 960     }
 961
 962   return buffer;
 963 #endif
 964 }
 965
 966 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
 967                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
 968 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
 969 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
 970 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
 971 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
 972 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
 973
 974 #if !defined(HAVE_STRTOLL_L) || !defined(HAVE_STRTOULL_L)
 975
 976 static guint64
 977 g_parse_long_long (const gchar  *nptr,
 978                    const gchar **endptr,
 979                    guint         base,
 980                    gboolean     *negative)
 981 {
 982   /* this code is based on on the strtol(3) code from GNU libc released under
 983    * the GNU Lesser General Public License.
 984    *
 985    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
 986    *        Free Software Foundation, Inc.
 987    */
 988   gboolean overflow;
 989   guint64 cutoff;
 990   guint64 cutlim;
 991   guint64 ui64;
 992   const gchar *s, *save;
 993   guchar c;
 994
 995   g_return_val_if_fail (nptr != NULL, 0);
 996
 997   *negative = FALSE;
 998   if (base == 1 || base > 36)
 999     {
1000       errno = EINVAL;
1001       if (endptr)
1002         *endptr = nptr;
1003       return 0;
1004     }
1005
1006   save = s = nptr;
1007
1008   /* Skip white space.  */
1009   while (ISSPACE (*s))
1010     ++s;
1011
1012   if (G_UNLIKELY (!*s))
1013     goto noconv;
1014
1015   /* Check for a sign.  */
1016   if (*s == '-')
1017     {
1018       *negative = TRUE;
1019       ++s;
1020     }
1021   else if (*s == '+')
1022     ++s;
1023
1024   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1025   if (*s == '0')
1026     {
1027       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1028         {
1029           s += 2;
1030           base = 16;
1031         }
1032       else if (base == 0)
1033         base = 8;
1034     }
1035   else if (base == 0)
1036     base = 10;
1037
1038   /* Save the pointer so we can check later if anything happened.  */
1039   save = s;
1040   cutoff = G_MAXUINT64 / base;
1041   cutlim = G_MAXUINT64 % base;
1042
1043   overflow = FALSE;
1044   ui64 = 0;
1045   c = *s;
1046   for (; c; c = *++s)
1047     {
1048       if (c >= '0' && c <= '9')
1049         c -= '0';
1050       else if (ISALPHA (c))
1051         c = TOUPPER (c) - 'A' + 10;
1052       else
1053         break;
1054       if (c >= base)
1055         break;
1056       /* Check for overflow.  */
1057       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1058         overflow = TRUE;
1059       else
1060         {
1061           ui64 *= base;
1062           ui64 += c;
1063         }
1064     }
1065
1066   /* Check if anything actually happened.  */
1067   if (s == save)
1068     goto noconv;
1069
1070   /* Store in ENDPTR the address of one character
1071      past the last character we converted.  */
1072   if (endptr)
1073     *endptr = s;
1074
1075   if (G_UNLIKELY (overflow))
1076     {
1077       errno = ERANGE;
1078       return G_MAXUINT64;
1079     }
1080
1081   return ui64;
1082
1083  noconv:
1084   /* We must handle a special case here: the base is 0 or 16 and the
1085      first two characters are '0' and 'x', but the rest are no
1086      hexadecimal digits.  This is no error case.  We return 0 and
1087      ENDPTR points to the `x`.  */
1088   if (endptr)
1089     {
1090       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1091           && save[-2] == '0')
1092         *endptr = &save[-1];
1093       else
1094         /*  There was no number to convert.  */
1095         *endptr = nptr;
1096     }
1097   return 0;
1098 }
1099 #endif
1100
1101 /**
1102  * g_ascii_strtoull:
1103  * @nptr:    the string to convert to a numeric value.
1104  * @endptr:  if non-%NULL, it returns the character after
1105  *           the last character used in the conversion.
1106  * @base:    to be used for the conversion, 2..36 or 0
1107  *
1108  * Converts a string to a #guint64 value.
1109  * This function behaves like the standard strtoull() function
1110  * does in the C locale. It does this without actually
1111  * changing the current locale, since that would not be
1112  * thread-safe.
1113  *
1114  * This function is typically used when reading configuration
1115  * files or other non-user input that should be locale independent.
1116  * To handle input from the user you should normally use the
1117  * locale-sensitive system strtoull() function.
1118  *
1119  * If the correct value would cause overflow, %G_MAXUINT64
1120  * is returned, and %ERANGE is stored in %errno.  If the base is
1121  * outside the valid range, zero is returned, and %EINVAL is stored
1122  * in %errno.  If the string conversion fails, zero is returned, and
1123  * @endptr returns @nptr (if @endptr is non-%NULL).
1124  *
1125  * Return value: the #guint64 value or zero on error.
1126  *
1127  * Since: 2.2
1128  */
1129 guint64
1130 g_ascii_strtoull (const gchar *nptr,
1131                   gchar      **endptr,
1132                   guint        base)
1133 {
1134 #ifdef HAVE_STRTOULL_L
1135   return strtoull_l (nptr, endptr, base, get_C_locale ());
1136 #else
1137   gboolean negative;
1138   guint64 result;
1139
1140   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1141
1142   /* Return the result of the appropriate sign.  */
1143   return negative ? -result : result;
1144 #endif
1145 }
1146
1147 /**
1148  * g_ascii_strtoll:
1149  * @nptr:    the string to convert to a numeric value.
1150  * @endptr:  if non-%NULL, it returns the character after
1151  *           the last character used in the conversion.
1152  * @base:    to be used for the conversion, 2..36 or 0
1153  *
1154  * Converts a string to a #gint64 value.
1155  * This function behaves like the standard strtoll() function
1156  * does in the C locale. It does this without actually
1157  * changing the current locale, since that would not be
1158  * thread-safe.
1159  *
1160  * This function is typically used when reading configuration
1161  * files or other non-user input that should be locale independent.
1162  * To handle input from the user you should normally use the
1163  * locale-sensitive system strtoll() function.
1164  *
1165  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1166  * is returned, and %ERANGE is stored in %errno.  If the base is
1167  * outside the valid range, zero is returned, and %EINVAL is stored
1168  * in %errno.  If the string conversion fails, zero is returned, and
1169  * @endptr returns @nptr (if @endptr is non-%NULL).
1170  *
1171  * Return value: the #gint64 value or zero on error.
1172  *
1173  * Since: 2.12
1174  */
1175 gint64
1176 g_ascii_strtoll (const gchar *nptr,
1177                  gchar      **endptr,
1178                  guint        base)
1179 {
1180 #ifdef HAVE_STRTOLL_L
1181   return strtoll_l (nptr, endptr, base, get_C_locale ());
1182 #else
1183   gboolean negative;
1184   guint64 result;
1185
1186   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1187
1188   if (negative && result > (guint64) G_MININT64)
1189     {
1190       errno = ERANGE;
1191       return G_MININT64;
1192     }
1193   else if (!negative && result > (guint64) G_MAXINT64)
1194     {
1195       errno = ERANGE;
1196       return G_MAXINT64;
1197     }
1198   else if (negative)
1199     return - (gint64) result;
1200   else
1201     return (gint64) result;
1202 #endif
1203 }
1204
1205 /**
1206  * g_strerror:
1207  * @errnum: the system error number. See the standard C %errno
1208  *     documentation
1209  *
1210  * Returns a string corresponding to the given error code, e.g.
1211  * "no such process". You should use this function in preference to
1212  * strerror(), because it returns a string in UTF-8 encoding, and since
1213  * not all platforms support the strerror() function.
1214  *
1215  * Returns: a UTF-8 string describing the error code. If the error code
1216  *     is unknown, it returns "unknown error (&lt;code&gt;)".
1217  */
1218 const gchar *
1219 g_strerror (gint errnum)
1220 {
1221   gchar buf[64];
1222   gchar *msg;
1223   gchar *tofree;
1224   const gchar *ret;
1225   gint saved_errno = errno;
1226
1227   msg = tofree = NULL;
1228
1229 #ifdef HAVE_STRERROR
1230   msg = strerror (errnum);
1231   if (!g_get_charset (NULL))
1232     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1233 #endif
1234
1235   if (!msg)
1236     {
1237       msg = buf;
1238       _g_sprintf (msg, "unknown error (%d)", errnum);
1239     }
1240
1241   ret = g_intern_string (msg);
1242   g_free (tofree);
1243   errno = saved_errno;
1244   return ret;
1245 }
1246
1247 /**
1248  * g_strsignal:
1249  * @signum: the signal number. See the <literal>signal</literal>
1250  *     documentation
1251  *
1252  * Returns a string describing the given signal, e.g. "Segmentation fault".
1253  * You should use this function in preference to strsignal(), because it
1254  * returns a string in UTF-8 encoding, and since not all platforms support
1255  * the strsignal() function.
1256  *
1257  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1258  *     it returns "unknown signal (&lt;signum&gt;)".
1259  */
1260 const gchar *
1261 g_strsignal (gint signum)
1262 {
1263   gchar *msg;
1264   gchar *tofree;
1265   const gchar *ret;
1266
1267   msg = tofree = NULL;
1268
1269 #ifdef HAVE_STRSIGNAL
1270   msg = strsignal (signum);
1271   if (!g_get_charset (NULL))
1272     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1273 #endif
1274
1275   if (!msg)
1276     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1277   ret = g_intern_string (msg);
1278   g_free (tofree);
1279
1280   return ret;
1281 }
1282
1283 /* Functions g_strlcpy and g_strlcat were originally developed by
1284  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1285  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1286  * for more information.
1287  */
1288
1289 #ifdef HAVE_STRLCPY
1290 /* Use the native ones, if available; they might be implemented in assembly */
1291 gsize
1292 g_strlcpy (gchar       *dest,
1293            const gchar *src,
1294            gsize        dest_size)
1295 {
1296   g_return_val_if_fail (dest != NULL, 0);
1297   g_return_val_if_fail (src  != NULL, 0);
1298
1299   return strlcpy (dest, src, dest_size);
1300 }
1301
1302 gsize
1303 g_strlcat (gchar       *dest,
1304            const gchar *src,
1305            gsize        dest_size)
1306 {
1307   g_return_val_if_fail (dest != NULL, 0);
1308   g_return_val_if_fail (src  != NULL, 0);
1309
1310   return strlcat (dest, src, dest_size);
1311 }
1312
1313 #else /* ! HAVE_STRLCPY */
1314 /**
1315  * g_strlcpy:
1316  * @dest: destination buffer
1317  * @src: source buffer
1318  * @dest_size: length of @dest in bytes
1319  *
1320  * Portability wrapper that calls strlcpy() on systems which have it,
1321  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1322  * guaranteed to be nul-terminated; @src must be nul-terminated;
1323  * @dest_size is the buffer size, not the number of chars to copy.
1324  *
1325  * At most dest_size - 1 characters will be copied. Always nul-terminates
1326  * (unless dest_size == 0). This function does <emphasis>not</emphasis>
1327  * allocate memory. Unlike strncpy(), this function doesn't pad dest (so
1328  * it's often faster). It returns the size of the attempted result,
1329  * strlen (src), so if @retval >= @dest_size, truncation occurred.
1330  *
1331  * <note><para>Caveat: strlcpy() is supposedly more secure than
1332  * strcpy() or strncpy(), but if you really want to avoid screwups,
1333  * g_strdup() is an even better idea.</para></note>
1334  *
1335  * Returns: length of @src
1336  */
1337 gsize
1338 g_strlcpy (gchar       *dest,
1339            const gchar *src,
1340            gsize        dest_size)
1341 {
1342   register gchar *d = dest;
1343   register const gchar *s = src;
1344   register gsize n = dest_size;
1345
1346   g_return_val_if_fail (dest != NULL, 0);
1347   g_return_val_if_fail (src  != NULL, 0);
1348
1349   /* Copy as many bytes as will fit */
1350   if (n != 0 && --n != 0)
1351     do
1352       {
1353         register gchar c = *s++;
1354
1355         *d++ = c;
1356         if (c == 0)
1357           break;
1358       }
1359     while (--n != 0);
1360
1361   /* If not enough room in dest, add NUL and traverse rest of src */
1362   if (n == 0)
1363     {
1364       if (dest_size != 0)
1365         *d = 0;
1366       while (*s++)
1367         ;
1368     }
1369
1370   return s - src - 1;  /* count does not include NUL */
1371 }
1372
1373 /**
1374  * g_strlcat:
1375  * @dest: destination buffer, already containing one nul-terminated string
1376  * @src: source buffer
1377  * @dest_size: length of @dest buffer in bytes (not length of existing string
1378  *     inside @dest)
1379  *
1380  * Portability wrapper that calls strlcat() on systems which have it,
1381  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1382  * guaranteeing nul-termination for @dest. The total size of @dest won't
1383  * exceed @dest_size.
1384  *
1385  * At most dest_size - 1 characters will be copied.
1386  * Unlike strncat, dest_size is the full size of dest, not the space left over.
1387  * This function does NOT allocate memory.
1388  * This always NUL terminates (unless siz == 0 or there were no NUL characters
1389  * in the dest_size characters of dest to start with).
1390  *
1391  * <note><para>Caveat: this is supposedly a more secure alternative to
1392  * strcat() or strncat(), but for real security g_strconcat() is harder
1393  * to mess up.</para></note>
1394  *
1395  * Returns: size of attempted result, which is MIN (dest_size, strlen
1396  *          (original dest)) + strlen (src), so if retval >= dest_size,
1397  *          truncation occurred.
1398  **/
1399 gsize
1400 g_strlcat (gchar       *dest,
1401            const gchar *src,
1402            gsize        dest_size)
1403 {
1404   register gchar *d = dest;
1405   register const gchar *s = src;
1406   register gsize bytes_left = dest_size;
1407   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1408
1409   g_return_val_if_fail (dest != NULL, 0);
1410   g_return_val_if_fail (src  != NULL, 0);
1411
1412   /* Find the end of dst and adjust bytes left but don't go past end */
1413   while (*d != 0 && bytes_left-- != 0)
1414     d++;
1415   dlength = d - dest;
1416   bytes_left = dest_size - dlength;
1417
1418   if (bytes_left == 0)
1419     return dlength + strlen (s);
1420
1421   while (*s != 0)
1422     {
1423       if (bytes_left != 1)
1424         {
1425           *d++ = *s;
1426           bytes_left--;
1427         }
1428       s++;
1429     }
1430   *d = 0;
1431
1432   return dlength + (s - src);  /* count does not include NUL */
1433 }
1434 #endif /* ! HAVE_STRLCPY */
1435
1436 /**
1437  * g_ascii_strdown:
1438  * @str: a string.
1439  * @len: length of @str in bytes, or -1 if @str is nul-terminated.
1440  *
1441  * Converts all upper case ASCII letters to lower case ASCII letters.
1442  *
1443  * Return value: a newly-allocated string, with all the upper case
1444  *               characters in @str converted to lower case, with
1445  *               semantics that exactly match g_ascii_tolower(). (Note
1446  *               that this is unlike the old g_strdown(), which modified
1447  *               the string in place.)
1448  **/
1449 gchar*
1450 g_ascii_strdown (const gchar *str,
1451                  gssize       len)
1452 {
1453   gchar *result, *s;
1454
1455   g_return_val_if_fail (str != NULL, NULL);
1456
1457   if (len < 0)
1458     len = strlen (str);
1459
1460   result = g_strndup (str, len);
1461   for (s = result; *s; s++)
1462     *s = g_ascii_tolower (*s);
1463
1464   return result;
1465 }
1466
1467 /**
1468  * g_ascii_strup:
1469  * @str: a string.
1470  * @len: length of @str in bytes, or -1 if @str is nul-terminated.
1471  *
1472  * Converts all lower case ASCII letters to upper case ASCII letters.
1473  *
1474  * Return value: a newly allocated string, with all the lower case
1475  *               characters in @str converted to upper case, with
1476  *               semantics that exactly match g_ascii_toupper(). (Note
1477  *               that this is unlike the old g_strup(), which modified
1478  *               the string in place.)
1479  **/
1480 gchar*
1481 g_ascii_strup (const gchar *str,
1482                gssize       len)
1483 {
1484   gchar *result, *s;
1485
1486   g_return_val_if_fail (str != NULL, NULL);
1487
1488   if (len < 0)
1489     len = strlen (str);
1490
1491   result = g_strndup (str, len);
1492   for (s = result; *s; s++)
1493     *s = g_ascii_toupper (*s);
1494
1495   return result;
1496 }
1497
1498 /**
1499  * g_strdown:
1500  * @string: the string to convert.
1501  *
1502  * Converts a string to lower case.
1503  *
1504  * Return value: the string
1505  *
1506  * Deprecated:2.2: This function is totally broken for the reasons discussed
1507  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1508  * instead.
1509  **/
1510 gchar*
1511 g_strdown (gchar *string)
1512 {
1513   register guchar *s;
1514
1515   g_return_val_if_fail (string != NULL, NULL);
1516
1517   s = (guchar *) string;
1518
1519   while (*s)
1520     {
1521       if (isupper (*s))
1522         *s = tolower (*s);
1523       s++;
1524     }
1525
1526   return (gchar *) string;
1527 }
1528
1529 /**
1530  * g_strup:
1531  * @string: the string to convert.
1532  *
1533  * Converts a string to upper case.
1534  *
1535  * Return value: the string
1536  *
1537  * Deprecated:2.2: This function is totally broken for the reasons discussed
1538  * in the g_strncasecmp() docs - use g_ascii_strup() or g_utf8_strup() instead.
1539  **/
1540 gchar*
1541 g_strup (gchar *string)
1542 {
1543   register guchar *s;
1544
1545   g_return_val_if_fail (string != NULL, NULL);
1546
1547   s = (guchar *) string;
1548
1549   while (*s)
1550     {
1551       if (islower (*s))
1552         *s = toupper (*s);
1553       s++;
1554     }
1555
1556   return (gchar *) string;
1557 }
1558
1559 /**
1560  * g_strreverse:
1561  * @string: the string to reverse
1562  *
1563  * Reverses all of the bytes in a string. For example,
1564  * <literal>g_strreverse ("abcdef")</literal> will result
1565  * in "fedcba".
1566  *
1567  * Note that g_strreverse() doesn't work on UTF-8 strings
1568  * containing multibyte characters. For that purpose, use
1569  * g_utf8_strreverse().
1570  *
1571  * Returns: the same pointer passed in as @string
1572  */
1573 gchar*
1574 g_strreverse (gchar *string)
1575 {
1576   g_return_val_if_fail (string != NULL, NULL);
1577
1578   if (*string)
1579     {
1580       register gchar *h, *t;
1581
1582       h = string;
1583       t = string + strlen (string) - 1;
1584
1585       while (h < t)
1586         {
1587           register gchar c;
1588
1589           c = *h;
1590           *h = *t;
1591           h++;
1592           *t = c;
1593           t--;
1594         }
1595     }
1596
1597   return string;
1598 }
1599
1600 /**
1601  * g_ascii_tolower:
1602  * @c: any character.
1603  *
1604  * Convert a character to ASCII lower case.
1605  *
1606  * Unlike the standard C library tolower() function, this only
1607  * recognizes standard ASCII letters and ignores the locale, returning
1608  * all non-ASCII characters unchanged, even if they are lower case
1609  * letters in a particular character set. Also unlike the standard
1610  * library function, this takes and returns a char, not an int, so
1611  * don't call it on %EOF but no need to worry about casting to #guchar
1612  * before passing a possibly non-ASCII character in.
1613  *
1614  * Return value: the result of converting @c to lower case.
1615  *               If @c is not an ASCII upper case letter,
1616  *               @c is returned unchanged.
1617  **/
1618 gchar
1619 g_ascii_tolower (gchar c)
1620 {
1621   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1622 }
1623
1624 /**
1625  * g_ascii_toupper:
1626  * @c: any character.
1627  *
1628  * Convert a character to ASCII upper case.
1629  *
1630  * Unlike the standard C library toupper() function, this only
1631  * recognizes standard ASCII letters and ignores the locale, returning
1632  * all non-ASCII characters unchanged, even if they are upper case
1633  * letters in a particular character set. Also unlike the standard
1634  * library function, this takes and returns a char, not an int, so
1635  * don't call it on %EOF but no need to worry about casting to #guchar
1636  * before passing a possibly non-ASCII character in.
1637  *
1638  * Return value: the result of converting @c to upper case.
1639  *               If @c is not an ASCII lower case letter,
1640  *               @c is returned unchanged.
1641  **/
1642 gchar
1643 g_ascii_toupper (gchar c)
1644 {
1645   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1646 }
1647
1648 /**
1649  * g_ascii_digit_value:
1650  * @c: an ASCII character.
1651  *
1652  * Determines the numeric value of a character as a decimal
1653  * digit. Differs from g_unichar_digit_value() because it takes
1654  * a char, so there's no worry about sign extension if characters
1655  * are signed.
1656  *
1657  * Return value: If @c is a decimal digit (according to
1658  * g_ascii_isdigit()), its numeric value. Otherwise, -1.
1659  **/
1660 int
1661 g_ascii_digit_value (gchar c)
1662 {
1663   if (g_ascii_isdigit (c))
1664     return c - '0';
1665   return -1;
1666 }
1667
1668 /**
1669  * g_ascii_xdigit_value:
1670  * @c: an ASCII character.
1671  *
1672  * Determines the numeric value of a character as a hexidecimal
1673  * digit. Differs from g_unichar_xdigit_value() because it takes
1674  * a char, so there's no worry about sign extension if characters
1675  * are signed.
1676  *
1677  * Return value: If @c is a hex digit (according to
1678  * g_ascii_isxdigit()), its numeric value. Otherwise, -1.
1679  **/
1680 int
1681 g_ascii_xdigit_value (gchar c)
1682 {
1683   if (c >= 'A' && c <= 'F')
1684     return c - 'A' + 10;
1685   if (c >= 'a' && c <= 'f')
1686     return c - 'a' + 10;
1687   return g_ascii_digit_value (c);
1688 }
1689
1690 /**
1691  * g_ascii_strcasecmp:
1692  * @s1: string to compare with @s2.
1693  * @s2: string to compare with @s1.
1694  *
1695  * Compare two strings, ignoring the case of ASCII characters.
1696  *
1697  * Unlike the BSD strcasecmp() function, this only recognizes standard
1698  * ASCII letters and ignores the locale, treating all non-ASCII
1699  * bytes as if they are not letters.
1700  *
1701  * This function should be used only on strings that are known to be
1702  * in encodings where the bytes corresponding to ASCII letters always
1703  * represent themselves. This includes UTF-8 and the ISO-8859-*
1704  * charsets, but not for instance double-byte encodings like the
1705  * Windows Codepage 932, where the trailing bytes of double-byte
1706  * characters include all ASCII letters. If you compare two CP932
1707  * strings using this function, you will get false matches.
1708  *
1709  * Return value: 0 if the strings match, a negative value if @s1 &lt; @s2,
1710  *   or a positive value if @s1 &gt; @s2.
1711  **/
1712 gint
1713 g_ascii_strcasecmp (const gchar *s1,
1714                     const gchar *s2)
1715 {
1716   gint c1, c2;
1717
1718   g_return_val_if_fail (s1 != NULL, 0);
1719   g_return_val_if_fail (s2 != NULL, 0);
1720
1721   while (*s1 && *s2)
1722     {
1723       c1 = (gint)(guchar) TOLOWER (*s1);
1724       c2 = (gint)(guchar) TOLOWER (*s2);
1725       if (c1 != c2)
1726         return (c1 - c2);
1727       s1++; s2++;
1728     }
1729
1730   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1731 }
1732
1733 /**
1734  * g_ascii_strncasecmp:
1735  * @s1: string to compare with @s2.
1736  * @s2: string to compare with @s1.
1737  * @n:  number of characters to compare.
1738  *
1739  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1740  * characters after the first @n in each string.
1741  *
1742  * Unlike the BSD strcasecmp() function, this only recognizes standard
1743  * ASCII letters and ignores the locale, treating all non-ASCII
1744  * characters as if they are not letters.
1745  *
1746  * The same warning as in g_ascii_strcasecmp() applies: Use this
1747  * function only on strings known to be in encodings where bytes
1748  * corresponding to ASCII letters always represent themselves.
1749  *
1750  * Return value: 0 if the strings match, a negative value if @s1 &lt; @s2,
1751  *   or a positive value if @s1 &gt; @s2.
1752  **/
1753 gint
1754 g_ascii_strncasecmp (const gchar *s1,
1755                      const gchar *s2,
1756                      gsize n)
1757 {
1758   gint c1, c2;
1759
1760   g_return_val_if_fail (s1 != NULL, 0);
1761   g_return_val_if_fail (s2 != NULL, 0);
1762
1763   while (n && *s1 && *s2)
1764     {
1765       n -= 1;
1766       c1 = (gint)(guchar) TOLOWER (*s1);
1767       c2 = (gint)(guchar) TOLOWER (*s2);
1768       if (c1 != c2)
1769         return (c1 - c2);
1770       s1++; s2++;
1771     }
1772
1773   if (n)
1774     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1775   else
1776     return 0;
1777 }
1778
1779 /**
1780  * g_strcasecmp:
1781  * @s1: a string.
1782  * @s2: a string to compare with @s1.
1783  *
1784  * A case-insensitive string comparison, corresponding to the standard
1785  * strcasecmp() function on platforms which support it.
1786  *
1787  * Return value: 0 if the strings match, a negative value if @s1 &lt; @s2,
1788  *   or a positive value if @s1 &gt; @s2.
1789  *
1790  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this function
1791  *   is deprecated and how to replace it.
1792  **/
1793 gint
1794 g_strcasecmp (const gchar *s1,
1795               const gchar *s2)
1796 {
1797 #ifdef HAVE_STRCASECMP
1798   g_return_val_if_fail (s1 != NULL, 0);
1799   g_return_val_if_fail (s2 != NULL, 0);
1800
1801   return strcasecmp (s1, s2);
1802 #else
1803   gint c1, c2;
1804
1805   g_return_val_if_fail (s1 != NULL, 0);
1806   g_return_val_if_fail (s2 != NULL, 0);
1807
1808   while (*s1 && *s2)
1809     {
1810       /* According to A. Cox, some platforms have islower's that
1811        * don't work right on non-uppercase
1812        */
1813       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1814       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1815       if (c1 != c2)
1816         return (c1 - c2);
1817       s1++; s2++;
1818     }
1819
1820   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1821 #endif
1822 }
1823
1824 /**
1825  * g_strncasecmp:
1826  * @s1: a string.
1827  * @s2: a string to compare with @s1.
1828  * @n: the maximum number of characters to compare.
1829  *
1830  * A case-insensitive string comparison, corresponding to the standard
1831  * strncasecmp() function on platforms which support it.
1832  * It is similar to g_strcasecmp() except it only compares the first @n
1833  * characters of the strings.
1834  *
1835  * Return value: 0 if the strings match, a negative value if @s1 &lt; @s2,
1836  *   or a positive value if @s1 &gt; @s2.
1837  *
1838  * Deprecated:2.2: The problem with g_strncasecmp() is that it does the
1839  * comparison by calling toupper()/tolower(). These functions are
1840  * locale-specific and operate on single bytes. However, it is impossible
1841  * to handle things correctly from an I18N standpoint by operating on
1842  * bytes, since characters may be multibyte. Thus g_strncasecmp() is
1843  * broken if your string is guaranteed to be ASCII, since it's
1844  * locale-sensitive, and it's broken if your string is localized, since
1845  * it doesn't work on many encodings at all, including UTF-8, EUC-JP,
1846  * etc.
1847  *
1848  * There are therefore two replacement functions: g_ascii_strncasecmp(),
1849  * which only works on ASCII and is not locale-sensitive, and
1850  * g_utf8_casefold(), which is good for case-insensitive sorting of UTF-8.
1851  **/
1852 gint
1853 g_strncasecmp (const gchar *s1,
1854                const gchar *s2,
1855                guint n)
1856 {
1857 #ifdef HAVE_STRNCASECMP
1858   return strncasecmp (s1, s2, n);
1859 #else
1860   gint c1, c2;
1861
1862   g_return_val_if_fail (s1 != NULL, 0);
1863   g_return_val_if_fail (s2 != NULL, 0);
1864
1865   while (n && *s1 && *s2)
1866     {
1867       n -= 1;
1868       /* According to A. Cox, some platforms have islower's that
1869        * don't work right on non-uppercase
1870        */
1871       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1872       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1873       if (c1 != c2)
1874         return (c1 - c2);
1875       s1++; s2++;
1876     }
1877
1878   if (n)
1879     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1880   else
1881     return 0;
1882 #endif
1883 }
1884
1885 /**
1886  * g_strdelimit:
1887  * @string: the string to convert
1888  * @delimiters: a string containing the current delimiters, or %NULL
1889  *     to use the standard delimiters defined in #G_STR_DELIMITERS
1890  * @new_delimiter: the new delimiter character
1891  *
1892  * Converts any delimiter characters in @string to @new_delimiter.
1893  * Any characters in @string which are found in @delimiters are
1894  * changed to the @new_delimiter character. Modifies @string in place,
1895  * and returns @string itself, not a copy. The return value is to
1896  * allow nesting such as
1897  * |[
1898  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
1899  * ]|
1900  *
1901  * Returns: @string
1902  */
1903 gchar *
1904 g_strdelimit (gchar       *string,
1905               const gchar *delimiters,
1906               gchar        new_delim)
1907 {
1908   register gchar *c;
1909
1910   g_return_val_if_fail (string != NULL, NULL);
1911
1912   if (!delimiters)
1913     delimiters = G_STR_DELIMITERS;
1914
1915   for (c = string; *c; c++)
1916     {
1917       if (strchr (delimiters, *c))
1918         *c = new_delim;
1919     }
1920
1921   return string;
1922 }
1923
1924 /**
1925  * g_strcanon:
1926  * @string: a nul-terminated array of bytes
1927  * @valid_chars: bytes permitted in @string
1928  * @substitutor: replacement character for disallowed bytes
1929  *
1930  * For each character in @string, if the character is not in
1931  * @valid_chars, replaces the character with @substitutor.
1932  * Modifies @string in place, and return @string itself, not
1933  * a copy. The return value is to allow nesting such as
1934  * |[
1935  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
1936  * ]|
1937  *
1938  * Returns: @string
1939  */
1940 gchar *
1941 g_strcanon (gchar       *string,
1942             const gchar *valid_chars,
1943             gchar        substitutor)
1944 {
1945   register gchar *c;
1946
1947   g_return_val_if_fail (string != NULL, NULL);
1948   g_return_val_if_fail (valid_chars != NULL, NULL);
1949
1950   for (c = string; *c; c++)
1951     {
1952       if (!strchr (valid_chars, *c))
1953         *c = substitutor;
1954     }
1955
1956   return string;
1957 }
1958
1959 /**
1960  * g_strcompress:
1961  * @source: a string to compress
1962  *
1963  * Replaces all escaped characters with their one byte equivalent.
1964  *
1965  * This function does the reverse conversion of g_strescape().
1966  *
1967  * Returns: a newly-allocated copy of @source with all escaped
1968  *     character compressed
1969  */
1970 gchar *
1971 g_strcompress (const gchar *source)
1972 {
1973   const gchar *p = source, *octal;
1974   gchar *dest = g_malloc (strlen (source) + 1);
1975   gchar *q = dest;
1976
1977   while (*p)
1978     {
1979       if (*p == '\\')
1980         {
1981           p++;
1982           switch (*p)
1983             {
1984             case '\0':
1985               g_warning ("g_strcompress: trailing \\");
1986               goto out;
1987             case '0':  case '1':  case '2':  case '3':  case '4':
1988             case '5':  case '6':  case '7':
1989               *q = 0;
1990               octal = p;
1991               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
1992                 {
1993                   *q = (*q * 8) + (*p - '0');
1994                   p++;
1995                 }
1996               q++;
1997               p--;
1998               break;
1999             case 'b':
2000               *q++ = '\b';
2001               break;
2002             case 'f':
2003               *q++ = '\f';
2004               break;
2005             case 'n':
2006               *q++ = '\n';
2007               break;
2008             case 'r':
2009               *q++ = '\r';
2010               break;
2011             case 't':
2012               *q++ = '\t';
2013               break;
2014             default:            /* Also handles \" and \\ */
2015               *q++ = *p;
2016               break;
2017             }
2018         }
2019       else
2020         *q++ = *p;
2021       p++;
2022     }
2023 out:
2024   *q = 0;
2025
2026   return dest;
2027 }
2028
2029 /**
2030  * g_strescape:
2031  * @source: a string to escape
2032  * @exceptions: a string of characters not to escape in @source
2033  *
2034  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\'
2035  * and '&quot;' in the string @source by inserting a '\' before
2036  * them. Additionally all characters in the range 0x01-0x1F (everything
2037  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2038  * replaced with a '\' followed by their octal representation.
2039  * Characters supplied in @exceptions are not escaped.
2040  *
2041  * g_strcompress() does the reverse conversion.
2042  *
2043  * Returns: a newly-allocated copy of @source with certain
2044  *     characters escaped. See above.
2045  */
2046 gchar *
2047 g_strescape (const gchar *source,
2048              const gchar *exceptions)
2049 {
2050   const guchar *p;
2051   gchar *dest;
2052   gchar *q;
2053   guchar excmap[256];
2054
2055   g_return_val_if_fail (source != NULL, NULL);
2056
2057   p = (guchar *) source;
2058   /* Each source byte needs maximally four destination chars (\777) */
2059   q = dest = g_malloc (strlen (source) * 4 + 1);
2060
2061   memset (excmap, 0, 256);
2062   if (exceptions)
2063     {
2064       guchar *e = (guchar *) exceptions;
2065
2066       while (*e)
2067         {
2068           excmap[*e] = 1;
2069           e++;
2070         }
2071     }
2072
2073   while (*p)
2074     {
2075       if (excmap[*p])
2076         *q++ = *p;
2077       else
2078         {
2079           switch (*p)
2080             {
2081             case '\b':
2082               *q++ = '\\';
2083               *q++ = 'b';
2084               break;
2085             case '\f':
2086               *q++ = '\\';
2087               *q++ = 'f';
2088               break;
2089             case '\n':
2090               *q++ = '\\';
2091               *q++ = 'n';
2092               break;
2093             case '\r':
2094               *q++ = '\\';
2095               *q++ = 'r';
2096               break;
2097             case '\t':
2098               *q++ = '\\';
2099               *q++ = 't';
2100               break;
2101             case '\\':
2102               *q++ = '\\';
2103               *q++ = '\\';
2104               break;
2105             case '"':
2106               *q++ = '\\';
2107               *q++ = '"';
2108               break;
2109             default:
2110               if ((*p < ' ') || (*p >= 0177))
2111                 {
2112                   *q++ = '\\';
2113                   *q++ = '0' + (((*p) >> 6) & 07);
2114                   *q++ = '0' + (((*p) >> 3) & 07);
2115                   *q++ = '0' + ((*p) & 07);
2116                 }
2117               else
2118                 *q++ = *p;
2119               break;
2120             }
2121         }
2122       p++;
2123     }
2124   *q = 0;
2125   return dest;
2126 }
2127
2128 /**
2129  * g_strchug:
2130  * @string: a string to remove the leading whitespace from
2131  *
2132  * Removes leading whitespace from a string, by moving the rest
2133  * of the characters forward.
2134  *
2135  * This function doesn't allocate or reallocate any memory;
2136  * it modifies @string in place. The pointer to @string is
2137  * returned to allow the nesting of functions.
2138  *
2139  * Also see g_strchomp() and g_strstrip().
2140  *
2141  * Returns: @string
2142  */
2143 gchar *
2144 g_strchug (gchar *string)
2145 {
2146   guchar *start;
2147
2148   g_return_val_if_fail (string != NULL, NULL);
2149
2150   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2151     ;
2152
2153   g_memmove (string, start, strlen ((gchar *) start) + 1);
2154
2155   return string;
2156 }
2157
2158 /**
2159  * g_strchomp:
2160  * @string: a string to remove the trailing whitespace from
2161  *
2162  * Removes trailing whitespace from a string.
2163  *
2164  * This function doesn't allocate or reallocate any memory;
2165  * it modifies @string in place. The pointer to @string is
2166  * returned to allow the nesting of functions.
2167  *
2168  * Also see g_strchug() and g_strstrip().
2169  *
2170  * Returns: @string.
2171  */
2172 gchar *
2173 g_strchomp (gchar *string)
2174 {
2175   gsize len;
2176
2177   g_return_val_if_fail (string != NULL, NULL);
2178
2179   len = strlen (string);
2180   while (len--)
2181     {
2182       if (g_ascii_isspace ((guchar) string[len]))
2183         string[len] = '\0';
2184       else
2185         break;
2186     }
2187
2188   return string;
2189 }
2190
2191 /**
2192  * g_strsplit:
2193  * @string: a string to split
2194  * @delimiter: a string which specifies the places at which to split
2195  *     the string. The delimiter is not included in any of the resulting
2196  *     strings, unless @max_tokens is reached.
2197  * @max_tokens: the maximum number of pieces to split @string into.
2198  *     If this is less than 1, the string is split completely.
2199  *
2200  * Splits a string into a maximum of @max_tokens pieces, using the given
2201  * @delimiter. If @max_tokens is reached, the remainder of @string is
2202  * appended to the last token.
2203  *
2204  * As a special case, the result of splitting the empty string "" is an empty
2205  * vector, not a vector containing a single string. The reason for this
2206  * special case is that being able to represent a empty vector is typically
2207  * more useful than consistent handling of empty elements. If you do need
2208  * to represent empty elements, you'll need to check for the empty string
2209  * before calling g_strsplit().
2210  *
2211  * Return value: a newly-allocated %NULL-terminated array of strings. Use
2212  *    g_strfreev() to free it.
2213  */
2214 gchar**
2215 g_strsplit (const gchar *string,
2216             const gchar *delimiter,
2217             gint         max_tokens)
2218 {
2219   GSList *string_list = NULL, *slist;
2220   gchar **str_array, *s;
2221   guint n = 0;
2222   const gchar *remainder;
2223
2224   g_return_val_if_fail (string != NULL, NULL);
2225   g_return_val_if_fail (delimiter != NULL, NULL);
2226   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2227
2228   if (max_tokens < 1)
2229     max_tokens = G_MAXINT;
2230
2231   remainder = string;
2232   s = strstr (remainder, delimiter);
2233   if (s)
2234     {
2235       gsize delimiter_len = strlen (delimiter);
2236
2237       while (--max_tokens && s)
2238         {
2239           gsize len;
2240
2241           len = s - remainder;
2242           string_list = g_slist_prepend (string_list,
2243                                          g_strndup (remainder, len));
2244           n++;
2245           remainder = s + delimiter_len;
2246           s = strstr (remainder, delimiter);
2247         }
2248     }
2249   if (*string)
2250     {
2251       n++;
2252       string_list = g_slist_prepend (string_list, g_strdup (remainder));
2253     }
2254
2255   str_array = g_new (gchar*, n + 1);
2256
2257   str_array[n--] = NULL;
2258   for (slist = string_list; slist; slist = slist->next)
2259     str_array[n--] = slist->data;
2260
2261   g_slist_free (string_list);
2262
2263   return str_array;
2264 }
2265
2266 /**
2267  * g_strsplit_set:
2268  * @string: The string to be tokenized
2269  * @delimiters: A nul-terminated string containing bytes that are used
2270  *     to split the string.
2271  * @max_tokens: The maximum number of tokens to split @string into.
2272  *     If this is less than 1, the string is split completely
2273  *
2274  * Splits @string into a number of tokens not containing any of the characters
2275  * in @delimiter. A token is the (possibly empty) longest string that does not
2276  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2277  * remainder is appended to the last token.
2278  *
2279  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2280  * %NULL-terminated vector containing the three strings "abc", "def",
2281  * and "ghi".
2282  *
2283  * The result if g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2284  * vector containing the four strings "", "def", "ghi", and "".
2285  *
2286  * As a special case, the result of splitting the empty string "" is an empty
2287  * vector, not a vector containing a single string. The reason for this
2288  * special case is that being able to represent a empty vector is typically
2289  * more useful than consistent handling of empty elements. If you do need
2290  * to represent empty elements, you'll need to check for the empty string
2291  * before calling g_strsplit_set().
2292  *
2293  * Note that this function works on bytes not characters, so it can't be used
2294  * to delimit UTF-8 strings for anything but ASCII characters.
2295  *
2296  * Return value: a newly-allocated %NULL-terminated array of strings. Use
2297  *    g_strfreev() to free it.
2298  *
2299  * Since: 2.4
2300  **/
2301 gchar **
2302 g_strsplit_set (const gchar *string,
2303                 const gchar *delimiters,
2304                 gint         max_tokens)
2305 {
2306   gboolean delim_table[256];
2307   GSList *tokens, *list;
2308   gint n_tokens;
2309   const gchar *s;
2310   const gchar *current;
2311   gchar *token;
2312   gchar **result;
2313
2314   g_return_val_if_fail (string != NULL, NULL);
2315   g_return_val_if_fail (delimiters != NULL, NULL);
2316
2317   if (max_tokens < 1)
2318     max_tokens = G_MAXINT;
2319
2320   if (*string == '\0')
2321     {
2322       result = g_new (char *, 1);
2323       result[0] = NULL;
2324       return result;
2325     }
2326
2327   memset (delim_table, FALSE, sizeof (delim_table));
2328   for (s = delimiters; *s != '\0'; ++s)
2329     delim_table[*(guchar *)s] = TRUE;
2330
2331   tokens = NULL;
2332   n_tokens = 0;
2333
2334   s = current = string;
2335   while (*s != '\0')
2336     {
2337       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2338         {
2339           token = g_strndup (current, s - current);
2340           tokens = g_slist_prepend (tokens, token);
2341           ++n_tokens;
2342
2343           current = s + 1;
2344         }
2345
2346       ++s;
2347     }
2348
2349   token = g_strndup (current, s - current);
2350   tokens = g_slist_prepend (tokens, token);
2351   ++n_tokens;
2352
2353   result = g_new (gchar *, n_tokens + 1);
2354
2355   result[n_tokens] = NULL;
2356   for (list = tokens; list != NULL; list = list->next)
2357     result[--n_tokens] = list->data;
2358
2359   g_slist_free (tokens);
2360
2361   return result;
2362 }
2363
2364 /**
2365  * g_strfreev:
2366  * @str_array: a %NULL-terminated array of strings to free
2367
2368  * Frees a %NULL-terminated array of strings, and the array itself.
2369  * If called on a %NULL value, g_strfreev() simply returns.
2370  **/
2371 void
2372 g_strfreev (gchar **str_array)
2373 {
2374   if (str_array)
2375     {
2376       int i;
2377
2378       for (i = 0; str_array[i] != NULL; i++)
2379         g_free (str_array[i]);
2380
2381       g_free (str_array);
2382     }
2383 }
2384
2385 /**
2386  * g_strdupv:
2387  * @str_array: a %NULL-terminated array of strings
2388  *
2389  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2390  * the new array should be freed by first freeing each string, then
2391  * the array itself. g_strfreev() does this for you. If called
2392  * on a %NULL value, g_strdupv() simply returns %NULL.
2393  *
2394  * Return value: a new %NULL-terminated array of strings.
2395  */
2396 gchar**
2397 g_strdupv (gchar **str_array)
2398 {
2399   if (str_array)
2400     {
2401       gint i;
2402       gchar **retval;
2403
2404       i = 0;
2405       while (str_array[i])
2406         ++i;
2407
2408       retval = g_new (gchar*, i + 1);
2409
2410       i = 0;
2411       while (str_array[i])
2412         {
2413           retval[i] = g_strdup (str_array[i]);
2414           ++i;
2415         }
2416       retval[i] = NULL;
2417
2418       return retval;
2419     }
2420   else
2421     return NULL;
2422 }
2423
2424 /**
2425  * g_strjoinv:
2426  * @separator: a string to insert between each of the strings, or %NULL
2427  * @str_array: a %NULL-terminated array of strings to join
2428  *
2429  * Joins a number of strings together to form one long string, with the
2430  * optional @separator inserted between each of them. The returned string
2431  * should be freed with g_free().
2432  *
2433  * Returns: a newly-allocated string containing all of the strings joined
2434  *     together, with @separator between them
2435  */
2436 gchar*
2437 g_strjoinv (const gchar  *separator,
2438             gchar       **str_array)
2439 {
2440   gchar *string;
2441   gchar *ptr;
2442
2443   g_return_val_if_fail (str_array != NULL, NULL);
2444
2445   if (separator == NULL)
2446     separator = "";
2447
2448   if (*str_array)
2449     {
2450       gint i;
2451       gsize len;
2452       gsize separator_len;
2453
2454       separator_len = strlen (separator);
2455       /* First part, getting length */
2456       len = 1 + strlen (str_array[0]);
2457       for (i = 1; str_array[i] != NULL; i++)
2458         len += strlen (str_array[i]);
2459       len += separator_len * (i - 1);
2460
2461       /* Second part, building string */
2462       string = g_new (gchar, len);
2463       ptr = g_stpcpy (string, *str_array);
2464       for (i = 1; str_array[i] != NULL; i++)
2465         {
2466           ptr = g_stpcpy (ptr, separator);
2467           ptr = g_stpcpy (ptr, str_array[i]);
2468         }
2469       }
2470   else
2471     string = g_strdup ("");
2472
2473   return string;
2474 }
2475
2476 /**
2477  * g_strjoin:
2478  * @separator: a string to insert between each of the strings, or %NULL
2479  * @...: a %NULL-terminated list of strings to join
2480  *
2481  * Joins a number of strings together to form one long string, with the
2482  * optional @separator inserted between each of them. The returned string
2483  * should be freed with g_free().
2484  *
2485  * Returns: a newly-allocated string containing all of the strings joined
2486  *     together, with @separator between them
2487  */
2488 gchar*
2489 g_strjoin (const gchar *separator,
2490            ...)
2491 {
2492   gchar *string, *s;
2493   va_list args;
2494   gsize len;
2495   gsize separator_len;
2496   gchar *ptr;
2497
2498   if (separator == NULL)
2499     separator = "";
2500
2501   separator_len = strlen (separator);
2502
2503   va_start (args, separator);
2504
2505   s = va_arg (args, gchar*);
2506
2507   if (s)
2508     {
2509       /* First part, getting length */
2510       len = 1 + strlen (s);
2511
2512       s = va_arg (args, gchar*);
2513       while (s)
2514         {
2515           len += separator_len + strlen (s);
2516           s = va_arg (args, gchar*);
2517         }
2518       va_end (args);
2519
2520       /* Second part, building string */
2521       string = g_new (gchar, len);
2522
2523       va_start (args, separator);
2524
2525       s = va_arg (args, gchar*);
2526       ptr = g_stpcpy (string, s);
2527
2528       s = va_arg (args, gchar*);
2529       while (s)
2530         {
2531           ptr = g_stpcpy (ptr, separator);
2532           ptr = g_stpcpy (ptr, s);
2533           s = va_arg (args, gchar*);
2534         }
2535     }
2536   else
2537     string = g_strdup ("");
2538
2539   va_end (args);
2540
2541   return string;
2542 }
2543
2544
2545 /**
2546  * g_strstr_len:
2547  * @haystack: a string
2548  * @haystack_len: the maximum length of @haystack. Note that -1 is
2549  *     a valid length, if @haystack is nul-terminated, meaning it will
2550  *     search through the whole string.
2551  * @needle: the string to search for
2552  *
2553  * Searches the string @haystack for the first occurrence
2554  * of the string @needle, limiting the length of the search
2555  * to @haystack_len.
2556  *
2557  * Return value: a pointer to the found occurrence, or
2558  *    %NULL if not found.
2559  */
2560 gchar *
2561 g_strstr_len (const gchar *haystack,
2562               gssize       haystack_len,
2563               const gchar *needle)
2564 {
2565   g_return_val_if_fail (haystack != NULL, NULL);
2566   g_return_val_if_fail (needle != NULL, NULL);
2567
2568   if (haystack_len < 0)
2569     return strstr (haystack, needle);
2570   else
2571     {
2572       const gchar *p = haystack;
2573       gsize needle_len = strlen (needle);
2574       const gchar *end;
2575       gsize i;
2576
2577       if (needle_len == 0)
2578         return (gchar *)haystack;
2579
2580       if (haystack_len < needle_len)
2581         return NULL;
2582
2583       end = haystack + haystack_len - needle_len;
2584
2585       while (p <= end && *p)
2586         {
2587           for (i = 0; i < needle_len; i++)
2588             if (p[i] != needle[i])
2589               goto next;
2590
2591           return (gchar *)p;
2592
2593         next:
2594           p++;
2595         }
2596
2597       return NULL;
2598     }
2599 }
2600
2601 /**
2602  * g_strrstr:
2603  * @haystack: a nul-terminated string
2604  * @needle: the nul-terminated string to search for
2605  *
2606  * Searches the string @haystack for the last occurrence
2607  * of the string @needle.
2608  *
2609  * Return value: a pointer to the found occurrence, or
2610  *    %NULL if not found.
2611  */
2612 gchar *
2613 g_strrstr (const gchar *haystack,
2614            const gchar *needle)
2615 {
2616   gsize i;
2617   gsize needle_len;
2618   gsize haystack_len;
2619   const gchar *p;
2620
2621   g_return_val_if_fail (haystack != NULL, NULL);
2622   g_return_val_if_fail (needle != NULL, NULL);
2623
2624   needle_len = strlen (needle);
2625   haystack_len = strlen (haystack);
2626
2627   if (needle_len == 0)
2628     return (gchar *)haystack;
2629
2630   if (haystack_len < needle_len)
2631     return NULL;
2632
2633   p = haystack + haystack_len - needle_len;
2634
2635   while (p >= haystack)
2636     {
2637       for (i = 0; i < needle_len; i++)
2638         if (p[i] != needle[i])
2639           goto next;
2640
2641       return (gchar *)p;
2642
2643     next:
2644       p--;
2645     }
2646
2647   return NULL;
2648 }
2649
2650 /**
2651  * g_strrstr_len:
2652  * @haystack: a nul-terminated string
2653  * @haystack_len: the maximum length of @haystack
2654  * @needle: the nul-terminated string to search for
2655  *
2656  * Searches the string @haystack for the last occurrence
2657  * of the string @needle, limiting the length of the search
2658  * to @haystack_len.
2659  *
2660  * Return value: a pointer to the found occurrence, or
2661  *    %NULL if not found.
2662  */
2663 gchar *
2664 g_strrstr_len (const gchar *haystack,
2665                gssize        haystack_len,
2666                const gchar *needle)
2667 {
2668   g_return_val_if_fail (haystack != NULL, NULL);
2669   g_return_val_if_fail (needle != NULL, NULL);
2670
2671   if (haystack_len < 0)
2672     return g_strrstr (haystack, needle);
2673   else
2674     {
2675       gsize needle_len = strlen (needle);
2676       const gchar *haystack_max = haystack + haystack_len;
2677       const gchar *p = haystack;
2678       gsize i;
2679
2680       while (p < haystack_max && *p)
2681         p++;
2682
2683       if (p < haystack + needle_len)
2684         return NULL;
2685
2686       p -= needle_len;
2687
2688       while (p >= haystack)
2689         {
2690           for (i = 0; i < needle_len; i++)
2691             if (p[i] != needle[i])
2692               goto next;
2693
2694           return (gchar *)p;
2695
2696         next:
2697           p--;
2698         }
2699
2700       return NULL;
2701     }
2702 }
2703
2704
2705 /**
2706  * g_str_has_suffix:
2707  * @str: a nul-terminated string
2708  * @suffix: the nul-terminated suffix to look for
2709  *
2710  * Looks whether the string @str ends with @suffix.
2711  *
2712  * Return value: %TRUE if @str end with @suffix, %FALSE otherwise.
2713  *
2714  * Since: 2.2
2715  */
2716 gboolean
2717 g_str_has_suffix (const gchar *str,
2718                   const gchar *suffix)
2719 {
2720   int str_len;
2721   int suffix_len;
2722
2723   g_return_val_if_fail (str != NULL, FALSE);
2724   g_return_val_if_fail (suffix != NULL, FALSE);
2725
2726   str_len = strlen (str);
2727   suffix_len = strlen (suffix);
2728
2729   if (str_len < suffix_len)
2730     return FALSE;
2731
2732   return strcmp (str + str_len - suffix_len, suffix) == 0;
2733 }
2734
2735 /**
2736  * g_str_has_prefix:
2737  * @str: a nul-terminated string
2738  * @prefix: the nul-terminated prefix to look for
2739  *
2740  * Looks whether the string @str begins with @prefix.
2741  *
2742  * Return value: %TRUE if @str begins with @prefix, %FALSE otherwise.
2743  *
2744  * Since: 2.2
2745  */
2746 gboolean
2747 g_str_has_prefix (const gchar *str,
2748                   const gchar *prefix)
2749 {
2750   int str_len;
2751   int prefix_len;
2752
2753   g_return_val_if_fail (str != NULL, FALSE);
2754   g_return_val_if_fail (prefix != NULL, FALSE);
2755
2756   str_len = strlen (str);
2757   prefix_len = strlen (prefix);
2758
2759   if (str_len < prefix_len)
2760     return FALSE;
2761
2762   return strncmp (str, prefix, prefix_len) == 0;
2763 }
2764
2765 /**
2766  * g_strv_length:
2767  * @str_array: a %NULL-terminated array of strings
2768  *
2769  * Returns the length of the given %NULL-terminated
2770  * string array @str_array.
2771  *
2772  * Return value: length of @str_array.
2773  *
2774  * Since: 2.6
2775  */
2776 guint
2777 g_strv_length (gchar **str_array)
2778 {
2779   guint i = 0;
2780
2781   g_return_val_if_fail (str_array != NULL, 0);
2782
2783   while (str_array[i])
2784     ++i;
2785
2786   return i;
2787 }