1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
38 #include <ctype.h> /* For tolower() */
41 /* Needed on BSD/OS X for e.g. strtod_l */
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51 #include "gstrfuncs.h"
54 #include "gprintfint.h"
59 * SECTION:string_utils
60 * @title: String Utility Functions
61 * @short_description: various string-related functions
63 * This section describes a number of utility functions for creating,
64 * duplicating, and manipulating strings.
66 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67 * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
68 * are declared in the header <filename>gprintf.h</filename> which is
69 * not included in <filename>glib.h</filename>
70 * (otherwise using <filename>glib.h</filename> would drag in
71 * <filename>stdio.h</filename>), so you'll have to explicitly include
72 * <literal><glib/gprintf.h></literal> in order to use the GLib
75 * <para id="string-precision">While you may use the printf() functions
76 * to format UTF-8 strings, notice that the precision of a
77 * <literal>%Ns</literal> parameter is interpreted as the
78 * number of bytes, not characters to print. On top of that, the GNU
79 * libc implementation of the printf() functions has the "feature" that
80 * it checks that the string given for the <literal>%Ns</literal>
81 * parameter consists of a whole number of characters in the current
82 * encoding. So, unless you are sure you are always going to be in an
83 * UTF-8 locale or your know your text is restricted to ASCII, avoid
84 * using <literal>%Ns</literal>. If your intention is to format
85 * strings for a certain number of columns, then
86 * <literal>%Ns</literal> is not a correct solution anyway, since it
87 * fails to take wide characters (see g_unichar_iswide()) into account.
95 * Determines whether a character is alphanumeric.
97 * Unlike the standard C library isalnum() function, this only
98 * recognizes standard ASCII letters and ignores the locale,
99 * returning %FALSE for all non-ASCII characters. Also, unlike
100 * the standard library function, this takes a char, not an int,
101 * so don't call it on %EOF, but no need to cast to #guchar before
102 * passing a possibly non-ASCII character in.
104 * Returns: %TRUE if @c is an ASCII alphanumeric character
111 * Determines whether a character is alphabetic (i.e. a letter).
113 * Unlike the standard C library isalpha() function, this only
114 * recognizes standard ASCII letters and ignores the locale,
115 * returning %FALSE for all non-ASCII characters. Also, unlike
116 * the standard library function, this takes a char, not an int,
117 * so don't call it on %EOF, but no need to cast to #guchar before
118 * passing a possibly non-ASCII character in.
120 * Returns: %TRUE if @c is an ASCII alphabetic character
127 * Determines whether a character is a control character.
129 * Unlike the standard C library iscntrl() function, this only
130 * recognizes standard ASCII control characters and ignores the
131 * locale, returning %FALSE for all non-ASCII characters. Also,
132 * unlike the standard library function, this takes a char, not
133 * an int, so don't call it on %EOF, but no need to cast to #guchar
134 * before passing a possibly non-ASCII character in.
136 * Returns: %TRUE if @c is an ASCII control character.
143 * Determines whether a character is digit (0-9).
145 * Unlike the standard C library isdigit() function, this takes
146 * a char, not an int, so don't call it on %EOF, but no need to
147 * cast to #guchar before passing a possibly non-ASCII character in.
149 * Returns: %TRUE if @c is an ASCII digit.
156 * Determines whether a character is a printing character and not a space.
158 * Unlike the standard C library isgraph() function, this only
159 * recognizes standard ASCII characters and ignores the locale,
160 * returning %FALSE for all non-ASCII characters. Also, unlike
161 * the standard library function, this takes a char, not an int,
162 * so don't call it on %EOF, but no need to cast to #guchar before
163 * passing a possibly non-ASCII character in.
165 * Returns: %TRUE if @c is an ASCII printing character other than space.
172 * Determines whether a character is an ASCII lower case letter.
174 * Unlike the standard C library islower() function, this only
175 * recognizes standard ASCII letters and ignores the locale,
176 * returning %FALSE for all non-ASCII characters. Also, unlike
177 * the standard library function, this takes a char, not an int,
178 * so don't call it on %EOF, but no need to worry about casting
179 * to #guchar before passing a possibly non-ASCII character in.
181 * Returns: %TRUE if @c is an ASCII lower case letter
188 * Determines whether a character is a printing character.
190 * Unlike the standard C library isprint() function, this only
191 * recognizes standard ASCII characters and ignores the locale,
192 * returning %FALSE for all non-ASCII characters. Also, unlike
193 * the standard library function, this takes a char, not an int,
194 * so don't call it on %EOF, but no need to cast to #guchar before
195 * passing a possibly non-ASCII character in.
197 * Returns: %TRUE if @c is an ASCII printing character.
204 * Determines whether a character is a punctuation character.
206 * Unlike the standard C library ispunct() function, this only
207 * recognizes standard ASCII letters and ignores the locale,
208 * returning %FALSE for all non-ASCII characters. Also, unlike
209 * the standard library function, this takes a char, not an int,
210 * so don't call it on %EOF, but no need to cast to #guchar before
211 * passing a possibly non-ASCII character in.
213 * Returns: %TRUE if @c is an ASCII punctuation character.
220 * Determines whether a character is a white-space character.
222 * Unlike the standard C library isspace() function, this only
223 * recognizes standard ASCII white-space and ignores the locale,
224 * returning %FALSE for all non-ASCII characters. Also, unlike
225 * the standard library function, this takes a char, not an int,
226 * so don't call it on %EOF, but no need to cast to #guchar before
227 * passing a possibly non-ASCII character in.
229 * Returns: %TRUE if @c is an ASCII white-space character
236 * Determines whether a character is an ASCII upper case letter.
238 * Unlike the standard C library isupper() function, this only
239 * recognizes standard ASCII letters and ignores the locale,
240 * returning %FALSE for all non-ASCII characters. Also, unlike
241 * the standard library function, this takes a char, not an int,
242 * so don't call it on %EOF, but no need to worry about casting
243 * to #guchar before passing a possibly non-ASCII character in.
245 * Returns: %TRUE if @c is an ASCII upper case letter
252 * Determines whether a character is a hexadecimal-digit character.
254 * Unlike the standard C library isxdigit() function, this takes
255 * a char, not an int, so don't call it on %EOF, but no need to
256 * cast to #guchar before passing a possibly non-ASCII character in.
258 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
262 * G_ASCII_DTOSTR_BUF_SIZE:
264 * A good size for a buffer to be passed into g_ascii_dtostr().
265 * It is guaranteed to be enough for all output of that function
266 * on systems with 64bit IEEE-compatible doubles.
268 * The typical usage would be something like:
270 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
272 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
278 * @string: a string to remove the leading and trailing whitespace from
280 * Removes leading and trailing whitespace from a string.
281 * See g_strchomp() and g_strchug().
289 * The standard delimiters, used in g_strdelimit().
292 static const guint16 ascii_table_data[256] = {
293 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
294 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
295 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
296 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
297 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
298 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
299 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
300 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
301 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
302 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
303 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
304 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
305 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
306 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
307 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
308 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
309 /* the upper 128 are all zeroes */
312 const guint16 * const g_ascii_table = ascii_table_data;
314 #if defined (HAVE_NEWLOCALE) && \
315 defined (HAVE_USELOCALE) && \
316 defined (HAVE_STRTOD_L) && \
317 defined (HAVE_STRTOULL_L) && \
318 defined (HAVE_STRTOLL_L)
319 #define USE_XLOCALE 1
326 static gsize initialized = FALSE;
327 static locale_t C_locale = NULL;
329 if (g_once_init_enter (&initialized))
331 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
332 g_once_init_leave (&initialized, TRUE);
341 * @str: the string to duplicate
343 * Duplicates a string. If @str is %NULL it returns %NULL.
344 * The returned string should be freed with g_free()
345 * when no longer needed.
347 * Returns: a newly-allocated copy of @str
350 g_strdup (const gchar *str)
357 length = strlen (str) + 1;
358 new_str = g_new (char, length);
359 memcpy (new_str, str, length);
369 * @mem: the memory to copy.
370 * @byte_size: the number of bytes to copy.
372 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
373 * from @mem. If @mem is %NULL it returns %NULL.
375 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
379 g_memdup (gconstpointer mem,
386 new_mem = g_malloc (byte_size);
387 memcpy (new_mem, mem, byte_size);
397 * @str: the string to duplicate
398 * @n: the maximum number of bytes to copy from @str
400 * Duplicates the first @n bytes of a string, returning a newly-allocated
401 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
402 * is less than @n bytes long the buffer is padded with nuls. If @str is
403 * %NULL it returns %NULL. The returned value should be freed when no longer
406 * To copy a number of characters from a UTF-8 encoded string, use
407 * g_utf8_strncpy() instead.
409 * Returns: a newly-allocated buffer containing the first @n bytes
410 * of @str, nul-terminated
413 g_strndup (const gchar *str,
420 new_str = g_new (gchar, n + 1);
421 strncpy (new_str, str, n);
432 * @length: the length of the new string
433 * @fill_char: the byte to fill the string with
435 * Creates a new string @length bytes long filled with @fill_char.
436 * The returned string should be freed when no longer needed.
438 * Returns: a newly-allocated string filled the @fill_char
441 g_strnfill (gsize length,
446 str = g_new (gchar, length + 1);
447 memset (str, (guchar)fill_char, length);
455 * @dest: destination buffer.
456 * @src: source string.
458 * Copies a nul-terminated string into the dest buffer, include the
459 * trailing nul, and return a pointer to the trailing nul byte.
460 * This is useful for concatenating multiple strings together
461 * without having to repeatedly scan for the end.
463 * Return value: a pointer to trailing nul byte.
466 g_stpcpy (gchar *dest,
470 g_return_val_if_fail (dest != NULL, NULL);
471 g_return_val_if_fail (src != NULL, NULL);
472 return stpcpy (dest, src);
474 register gchar *d = dest;
475 register const gchar *s = src;
477 g_return_val_if_fail (dest != NULL, NULL);
478 g_return_val_if_fail (src != NULL, NULL);
481 while (*s++ != '\0');
489 * @format: a standard printf() format string, but notice
490 * <link linkend="string-precision">string precision pitfalls</link>
491 * @args: the list of parameters to insert into the format string
493 * Similar to the standard C vsprintf() function but safer, since it
494 * calculates the maximum space required and allocates memory to hold
495 * the result. The returned string should be freed with g_free() when
498 * See also g_vasprintf(), which offers the same functionality, but
499 * additionally returns the length of the allocated string.
501 * Returns: a newly-allocated string holding the result
504 g_strdup_vprintf (const gchar *format,
507 gchar *string = NULL;
509 g_vasprintf (&string, format, args);
516 * @format: a standard printf() format string, but notice
517 * <link linkend="string-precision">string precision pitfalls</link>
518 * @...: the parameters to insert into the format string
520 * Similar to the standard C sprintf() function but safer, since it
521 * calculates the maximum space required and allocates memory to hold
522 * the result. The returned string should be freed with g_free() when no
525 * Returns: a newly-allocated string holding the result
528 g_strdup_printf (const gchar *format,
534 va_start (args, format);
535 buffer = g_strdup_vprintf (format, args);
543 * @string1: the first string to add, which must not be %NULL
544 * @...: a %NULL-terminated list of strings to append to the string
546 * Concatenates all of the given strings into one long string. The
547 * returned string should be freed with g_free() when no longer needed.
549 * The variable argument list must end with %NULL. If you forget the %NULL,
550 * g_strconcat() will start appending random memory junk to your string.
552 * Note that this function is usually not the right function to use to
553 * assemble a translated message from pieces, since proper translation
554 * often requires the pieces to be reordered.
556 * Returns: a newly-allocated string containing all the string arguments
559 g_strconcat (const gchar *string1, ...)
570 l = 1 + strlen (string1);
571 va_start (args, string1);
572 s = va_arg (args, gchar*);
576 s = va_arg (args, gchar*);
580 concat = g_new (gchar, l);
583 ptr = g_stpcpy (ptr, string1);
584 va_start (args, string1);
585 s = va_arg (args, gchar*);
588 ptr = g_stpcpy (ptr, s);
589 s = va_arg (args, gchar*);
598 * @nptr: the string to convert to a numeric value.
599 * @endptr: if non-%NULL, it returns the character after
600 * the last character used in the conversion.
602 * Converts a string to a #gdouble value.
603 * It calls the standard strtod() function to handle the conversion, but
604 * if the string is not completely converted it attempts the conversion
605 * again with g_ascii_strtod(), and returns the best match.
607 * This function should seldom be used. The normal situation when reading
608 * numbers not for human consumption is to use g_ascii_strtod(). Only when
609 * you know that you must expect both locale formatted and C formatted numbers
610 * should you use this. Make sure that you don't pass strings such as comma
611 * separated lists of values, since the commas may be interpreted as a decimal
612 * point in some locales, causing unexpected results.
614 * Return value: the #gdouble value.
617 g_strtod (const gchar *nptr,
625 g_return_val_if_fail (nptr != NULL, 0);
630 val_1 = strtod (nptr, &fail_pos_1);
632 if (fail_pos_1 && fail_pos_1[0] != 0)
633 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
635 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
638 *endptr = fail_pos_1;
644 *endptr = fail_pos_2;
651 * @nptr: the string to convert to a numeric value.
652 * @endptr: if non-%NULL, it returns the character after
653 * the last character used in the conversion.
655 * Converts a string to a #gdouble value.
657 * This function behaves like the standard strtod() function
658 * does in the C locale. It does this without actually changing
659 * the current locale, since that would not be thread-safe.
660 * A limitation of the implementation is that this function
661 * will still accept localized versions of infinities and NANs.
663 * This function is typically used when reading configuration
664 * files or other non-user input that should be locale independent.
665 * To handle input from the user you should normally use the
666 * locale-sensitive system strtod() function.
668 * To convert from a #gdouble to a string in a locale-insensitive
669 * way, use g_ascii_dtostr().
671 * If the correct value would cause overflow, plus or minus <literal>HUGE_VAL</literal>
672 * is returned (according to the sign of the value), and <literal>ERANGE</literal> is
673 * stored in <literal>errno</literal>. If the correct value would cause underflow,
674 * zero is returned and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
676 * This function resets <literal>errno</literal> before calling strtod() so that
677 * you can reliably detect overflow and underflow.
679 * Return value: the #gdouble value.
682 g_ascii_strtod (const gchar *nptr,
687 g_return_val_if_fail (nptr != NULL, 0);
691 return strtod_l (nptr, endptr, get_C_locale ());
698 struct lconv *locale_data;
700 const char *decimal_point;
701 int decimal_point_len;
702 const char *p, *decimal_point_pos;
703 const char *end = NULL; /* Silence gcc */
706 g_return_val_if_fail (nptr != NULL, 0);
711 locale_data = localeconv ();
712 decimal_point = locale_data->decimal_point;
713 decimal_point_len = strlen (decimal_point);
716 decimal_point_len = 1;
719 g_assert (decimal_point_len != 0);
721 decimal_point_pos = NULL;
724 if (decimal_point[0] != '.' ||
725 decimal_point[1] != 0)
728 /* Skip leading space */
729 while (g_ascii_isspace (*p))
732 /* Skip leading optional sign */
733 if (*p == '+' || *p == '-')
737 (p[1] == 'x' || p[1] == 'X'))
740 /* HEX - find the (optional) decimal point */
742 while (g_ascii_isxdigit (*p))
746 decimal_point_pos = p++;
748 while (g_ascii_isxdigit (*p))
751 if (*p == 'p' || *p == 'P')
753 if (*p == '+' || *p == '-')
755 while (g_ascii_isdigit (*p))
760 else if (g_ascii_isdigit (*p) || *p == '.')
762 while (g_ascii_isdigit (*p))
766 decimal_point_pos = p++;
768 while (g_ascii_isdigit (*p))
771 if (*p == 'e' || *p == 'E')
773 if (*p == '+' || *p == '-')
775 while (g_ascii_isdigit (*p))
780 /* For the other cases, we need not convert the decimal point */
783 if (decimal_point_pos)
787 /* We need to convert the '.' to the locale specific decimal point */
788 copy = g_malloc (end - nptr + 1 + decimal_point_len);
791 memcpy (c, nptr, decimal_point_pos - nptr);
792 c += decimal_point_pos - nptr;
793 memcpy (c, decimal_point, decimal_point_len);
794 c += decimal_point_len;
795 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
796 c += end - (decimal_point_pos + 1);
800 val = strtod (copy, &fail_pos);
801 strtod_errno = errno;
805 if (fail_pos - copy > decimal_point_pos - nptr)
806 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
808 fail_pos = (char *)nptr + (fail_pos - copy);
818 copy = g_malloc (end - (char *)nptr + 1);
819 memcpy (copy, nptr, end - nptr);
820 *(copy + (end - (char *)nptr)) = 0;
823 val = strtod (copy, &fail_pos);
824 strtod_errno = errno;
828 fail_pos = (char *)nptr + (fail_pos - copy);
836 val = strtod (nptr, &fail_pos);
837 strtod_errno = errno;
843 errno = strtod_errno;
852 * @buffer: A buffer to place the resulting string in
853 * @buf_len: The length of the buffer.
854 * @d: The #gdouble to convert
856 * Converts a #gdouble to a string, using the '.' as
859 * This function generates enough precision that converting
860 * the string back using g_ascii_strtod() gives the same machine-number
861 * (on machines with IEEE compatible 64bit doubles). It is
862 * guaranteed that the size of the resulting string will never
863 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes.
865 * Return value: The pointer to the buffer with the converted string.
868 g_ascii_dtostr (gchar *buffer,
872 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
875 #pragma GCC diagnostic push
876 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
880 * @buffer: A buffer to place the resulting string in
881 * @buf_len: The length of the buffer.
882 * @format: The printf()-style format to use for the
883 * code to use for converting.
884 * @d: The #gdouble to convert
886 * Converts a #gdouble to a string, using the '.' as
887 * decimal point. To format the number you pass in
888 * a printf()-style format string. Allowed conversion
889 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
891 * If you just want to want to serialize the value into a
892 * string, use g_ascii_dtostr().
894 * Return value: The pointer to the buffer with the converted string.
897 g_ascii_formatd (gchar *buffer,
905 old_locale = uselocale (get_C_locale ());
906 _g_snprintf (buffer, buf_len, format, d);
907 uselocale (old_locale);
912 struct lconv *locale_data;
914 const char *decimal_point;
915 int decimal_point_len;
920 g_return_val_if_fail (buffer != NULL, NULL);
921 g_return_val_if_fail (format[0] == '%', NULL);
922 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
924 format_char = format[strlen (format) - 1];
926 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
927 format_char == 'f' || format_char == 'F' ||
928 format_char == 'g' || format_char == 'G',
931 if (format[0] != '%')
934 if (strpbrk (format + 1, "'l%"))
937 if (!(format_char == 'e' || format_char == 'E' ||
938 format_char == 'f' || format_char == 'F' ||
939 format_char == 'g' || format_char == 'G'))
942 _g_snprintf (buffer, buf_len, format, d);
945 locale_data = localeconv ();
946 decimal_point = locale_data->decimal_point;
947 decimal_point_len = strlen (decimal_point);
950 decimal_point_len = 1;
953 g_assert (decimal_point_len != 0);
955 if (decimal_point[0] != '.' ||
956 decimal_point[1] != 0)
960 while (g_ascii_isspace (*p))
963 if (*p == '+' || *p == '-')
966 while (isdigit ((guchar)*p))
969 if (strncmp (p, decimal_point, decimal_point_len) == 0)
973 if (decimal_point_len > 1)
975 rest_len = strlen (p + (decimal_point_len-1));
976 memmove (p, p + (decimal_point_len-1), rest_len);
985 #pragma GCC diagnostic pop
987 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
988 (c) == '\r' || (c) == '\t' || (c) == '\v')
989 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
990 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
991 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
992 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
993 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
998 g_parse_long_long (const gchar *nptr,
999 const gchar **endptr,
1003 /* this code is based on on the strtol(3) code from GNU libc released under
1004 * the GNU Lesser General Public License.
1006 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1007 * Free Software Foundation, Inc.
1013 const gchar *s, *save;
1016 g_return_val_if_fail (nptr != NULL, 0);
1019 if (base == 1 || base > 36)
1029 /* Skip white space. */
1030 while (ISSPACE (*s))
1033 if (G_UNLIKELY (!*s))
1036 /* Check for a sign. */
1045 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1048 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1059 /* Save the pointer so we can check later if anything happened. */
1061 cutoff = G_MAXUINT64 / base;
1062 cutlim = G_MAXUINT64 % base;
1069 if (c >= '0' && c <= '9')
1071 else if (ISALPHA (c))
1072 c = TOUPPER (c) - 'A' + 10;
1077 /* Check for overflow. */
1078 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1087 /* Check if anything actually happened. */
1091 /* Store in ENDPTR the address of one character
1092 past the last character we converted. */
1096 if (G_UNLIKELY (overflow))
1105 /* We must handle a special case here: the base is 0 or 16 and the
1106 first two characters are '0' and 'x', but the rest are no
1107 hexadecimal digits. This is no error case. We return 0 and
1108 ENDPTR points to the `x`. */
1111 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1113 *endptr = &save[-1];
1115 /* There was no number to convert. */
1120 #endif /* !USE_XLOCALE */
1124 * @nptr: the string to convert to a numeric value.
1125 * @endptr: if non-%NULL, it returns the character after
1126 * the last character used in the conversion.
1127 * @base: to be used for the conversion, 2..36 or 0
1129 * Converts a string to a #guint64 value.
1130 * This function behaves like the standard strtoull() function
1131 * does in the C locale. It does this without actually
1132 * changing the current locale, since that would not be
1135 * This function is typically used when reading configuration
1136 * files or other non-user input that should be locale independent.
1137 * To handle input from the user you should normally use the
1138 * locale-sensitive system strtoull() function.
1140 * If the correct value would cause overflow, %G_MAXUINT64
1141 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1142 * If the base is outside the valid range, zero is returned, and
1143 * <literal>EINVAL</literal> is stored in <literal>errno</literal>.
1144 * If the string conversion fails, zero is returned, and @endptr returns
1145 * @nptr (if @endptr is non-%NULL).
1147 * Return value: the #guint64 value or zero on error.
1152 g_ascii_strtoull (const gchar *nptr,
1157 return strtoull_l (nptr, endptr, base, get_C_locale ());
1162 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1164 /* Return the result of the appropriate sign. */
1165 return negative ? -result : result;
1171 * @nptr: the string to convert to a numeric value.
1172 * @endptr: if non-%NULL, it returns the character after
1173 * the last character used in the conversion.
1174 * @base: to be used for the conversion, 2..36 or 0
1176 * Converts a string to a #gint64 value.
1177 * This function behaves like the standard strtoll() function
1178 * does in the C locale. It does this without actually
1179 * changing the current locale, since that would not be
1182 * This function is typically used when reading configuration
1183 * files or other non-user input that should be locale independent.
1184 * To handle input from the user you should normally use the
1185 * locale-sensitive system strtoll() function.
1187 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1188 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1189 * If the base is outside the valid range, zero is returned, and
1190 * <literal>EINVAL</literal> is stored in <literal>errno</literal>. If the
1191 * string conversion fails, zero is returned, and @endptr returns @nptr
1192 * (if @endptr is non-%NULL).
1194 * Return value: the #gint64 value or zero on error.
1199 g_ascii_strtoll (const gchar *nptr,
1204 return strtoll_l (nptr, endptr, base, get_C_locale ());
1209 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1211 if (negative && result > (guint64) G_MININT64)
1216 else if (!negative && result > (guint64) G_MAXINT64)
1222 return - (gint64) result;
1224 return (gint64) result;
1230 * @errnum: the system error number. See the standard C %errno
1233 * Returns a string corresponding to the given error code, e.g.
1234 * "no such process". You should use this function in preference to
1235 * strerror(), because it returns a string in UTF-8 encoding, and since
1236 * not all platforms support the strerror() function.
1238 * Returns: a UTF-8 string describing the error code. If the error code
1239 * is unknown, it returns "unknown error (<code>)".
1242 g_strerror (gint errnum)
1245 gchar *tofree = NULL;
1247 gint saved_errno = errno;
1249 msg = strerror (errnum);
1250 if (!g_get_charset (NULL))
1251 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1253 ret = g_intern_string (msg);
1255 errno = saved_errno;
1261 * @signum: the signal number. See the <literal>signal</literal>
1264 * Returns a string describing the given signal, e.g. "Segmentation fault".
1265 * You should use this function in preference to strsignal(), because it
1266 * returns a string in UTF-8 encoding, and since not all platforms support
1267 * the strsignal() function.
1269 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1270 * it returns "unknown signal (<signum>)".
1273 g_strsignal (gint signum)
1279 msg = tofree = NULL;
1281 #ifdef HAVE_STRSIGNAL
1282 msg = strsignal (signum);
1283 if (!g_get_charset (NULL))
1284 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1288 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1289 ret = g_intern_string (msg);
1295 /* Functions g_strlcpy and g_strlcat were originally developed by
1296 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1297 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1298 * for more information.
1302 /* Use the native ones, if available; they might be implemented in assembly */
1304 g_strlcpy (gchar *dest,
1308 g_return_val_if_fail (dest != NULL, 0);
1309 g_return_val_if_fail (src != NULL, 0);
1311 return strlcpy (dest, src, dest_size);
1315 g_strlcat (gchar *dest,
1319 g_return_val_if_fail (dest != NULL, 0);
1320 g_return_val_if_fail (src != NULL, 0);
1322 return strlcat (dest, src, dest_size);
1325 #else /* ! HAVE_STRLCPY */
1328 * @dest: destination buffer
1329 * @src: source buffer
1330 * @dest_size: length of @dest in bytes
1332 * Portability wrapper that calls strlcpy() on systems which have it,
1333 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1334 * guaranteed to be nul-terminated; @src must be nul-terminated;
1335 * @dest_size is the buffer size, not the number of bytes to copy.
1337 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1338 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1339 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1340 * returns the size of the attempted result, strlen (src), so if
1341 * @retval >= @dest_size, truncation occurred.
1343 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1344 * but if you really want to avoid screwups, g_strdup() is an even better
1347 * Returns: length of @src
1350 g_strlcpy (gchar *dest,
1354 register gchar *d = dest;
1355 register const gchar *s = src;
1356 register gsize n = dest_size;
1358 g_return_val_if_fail (dest != NULL, 0);
1359 g_return_val_if_fail (src != NULL, 0);
1361 /* Copy as many bytes as will fit */
1362 if (n != 0 && --n != 0)
1365 register gchar c = *s++;
1373 /* If not enough room in dest, add NUL and traverse rest of src */
1382 return s - src - 1; /* count does not include NUL */
1387 * @dest: destination buffer, already containing one nul-terminated string
1388 * @src: source buffer
1389 * @dest_size: length of @dest buffer in bytes (not length of existing string
1392 * Portability wrapper that calls strlcat() on systems which have it,
1393 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1394 * guaranteeing nul-termination for @dest. The total size of @dest won't
1395 * exceed @dest_size.
1397 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1398 * @dest_size is the full size of dest, not the space left over. This
1399 * function does not allocate memory. It always nul-terminates (unless
1400 * @dest_size == 0 or there were no nul characters in the @dest_size
1401 * characters of dest to start with).
1403 * Caveat: this is supposedly a more secure alternative to strcat() or
1404 * strncat(), but for real security g_strconcat() is harder to mess up.
1406 * Returns: size of attempted result, which is MIN (dest_size, strlen
1407 * (original dest)) + strlen (src), so if retval >= dest_size,
1408 * truncation occurred.
1411 g_strlcat (gchar *dest,
1415 register gchar *d = dest;
1416 register const gchar *s = src;
1417 register gsize bytes_left = dest_size;
1418 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1420 g_return_val_if_fail (dest != NULL, 0);
1421 g_return_val_if_fail (src != NULL, 0);
1423 /* Find the end of dst and adjust bytes left but don't go past end */
1424 while (*d != 0 && bytes_left-- != 0)
1427 bytes_left = dest_size - dlength;
1429 if (bytes_left == 0)
1430 return dlength + strlen (s);
1434 if (bytes_left != 1)
1443 return dlength + (s - src); /* count does not include NUL */
1445 #endif /* ! HAVE_STRLCPY */
1450 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1452 * Converts all upper case ASCII letters to lower case ASCII letters.
1454 * Return value: a newly-allocated string, with all the upper case
1455 * characters in @str converted to lower case, with semantics that
1456 * exactly match g_ascii_tolower(). (Note that this is unlike the
1457 * old g_strdown(), which modified the string in place.)
1460 g_ascii_strdown (const gchar *str,
1465 g_return_val_if_fail (str != NULL, NULL);
1470 result = g_strndup (str, len);
1471 for (s = result; *s; s++)
1472 *s = g_ascii_tolower (*s);
1480 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1482 * Converts all lower case ASCII letters to upper case ASCII letters.
1484 * Return value: a newly allocated string, with all the lower case
1485 * characters in @str converted to upper case, with semantics that
1486 * exactly match g_ascii_toupper(). (Note that this is unlike the
1487 * old g_strup(), which modified the string in place.)
1490 g_ascii_strup (const gchar *str,
1495 g_return_val_if_fail (str != NULL, NULL);
1500 result = g_strndup (str, len);
1501 for (s = result; *s; s++)
1502 *s = g_ascii_toupper (*s);
1511 * Determines if a string is pure ASCII. A string is pure ASCII if it
1512 * contains no bytes with the high bit set.
1514 * Returns: %TRUE if @string is ascii
1519 g_str_is_ascii (const gchar *string)
1523 for (i = 0; string[i]; i++)
1524 if (string[i] & 0x80)
1532 * @string: the string to convert.
1534 * Converts a string to lower case.
1536 * Return value: the string
1538 * Deprecated:2.2: This function is totally broken for the reasons discussed
1539 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1543 g_strdown (gchar *string)
1547 g_return_val_if_fail (string != NULL, NULL);
1549 s = (guchar *) string;
1558 return (gchar *) string;
1563 * @string: the string to convert
1565 * Converts a string to upper case.
1567 * Return value: the string
1569 * Deprecated:2.2: This function is totally broken for the reasons
1570 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1571 * or g_utf8_strup() instead.
1574 g_strup (gchar *string)
1578 g_return_val_if_fail (string != NULL, NULL);
1580 s = (guchar *) string;
1589 return (gchar *) string;
1594 * @string: the string to reverse
1596 * Reverses all of the bytes in a string. For example,
1597 * <literal>g_strreverse ("abcdef")</literal> will result
1600 * Note that g_strreverse() doesn't work on UTF-8 strings
1601 * containing multibyte characters. For that purpose, use
1602 * g_utf8_strreverse().
1604 * Returns: the same pointer passed in as @string
1607 g_strreverse (gchar *string)
1609 g_return_val_if_fail (string != NULL, NULL);
1613 register gchar *h, *t;
1616 t = string + strlen (string) - 1;
1637 * Convert a character to ASCII lower case.
1639 * Unlike the standard C library tolower() function, this only
1640 * recognizes standard ASCII letters and ignores the locale, returning
1641 * all non-ASCII characters unchanged, even if they are lower case
1642 * letters in a particular character set. Also unlike the standard
1643 * library function, this takes and returns a char, not an int, so
1644 * don't call it on %EOF but no need to worry about casting to #guchar
1645 * before passing a possibly non-ASCII character in.
1647 * Return value: the result of converting @c to lower case. If @c is
1648 * not an ASCII upper case letter, @c is returned unchanged.
1651 g_ascii_tolower (gchar c)
1653 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1660 * Convert a character to ASCII upper case.
1662 * Unlike the standard C library toupper() function, this only
1663 * recognizes standard ASCII letters and ignores the locale, returning
1664 * all non-ASCII characters unchanged, even if they are upper case
1665 * letters in a particular character set. Also unlike the standard
1666 * library function, this takes and returns a char, not an int, so
1667 * don't call it on %EOF but no need to worry about casting to #guchar
1668 * before passing a possibly non-ASCII character in.
1670 * Return value: the result of converting @c to upper case. If @c is not
1671 * an ASCII lower case letter, @c is returned unchanged.
1674 g_ascii_toupper (gchar c)
1676 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1680 * g_ascii_digit_value:
1681 * @c: an ASCII character
1683 * Determines the numeric value of a character as a decimal digit.
1684 * Differs from g_unichar_digit_value() because it takes a char, so
1685 * there's no worry about sign extension if characters are signed.
1687 * Return value: If @c is a decimal digit (according to g_ascii_isdigit()),
1688 * its numeric value. Otherwise, -1.
1691 g_ascii_digit_value (gchar c)
1693 if (g_ascii_isdigit (c))
1699 * g_ascii_xdigit_value:
1700 * @c: an ASCII character.
1702 * Determines the numeric value of a character as a hexidecimal
1703 * digit. Differs from g_unichar_xdigit_value() because it takes
1704 * a char, so there's no worry about sign extension if characters
1707 * Return value: If @c is a hex digit (according to g_ascii_isxdigit()),
1708 * its numeric value. Otherwise, -1.
1711 g_ascii_xdigit_value (gchar c)
1713 if (c >= 'A' && c <= 'F')
1714 return c - 'A' + 10;
1715 if (c >= 'a' && c <= 'f')
1716 return c - 'a' + 10;
1717 return g_ascii_digit_value (c);
1721 * g_ascii_strcasecmp:
1722 * @s1: string to compare with @s2
1723 * @s2: string to compare with @s1
1725 * Compare two strings, ignoring the case of ASCII characters.
1727 * Unlike the BSD strcasecmp() function, this only recognizes standard
1728 * ASCII letters and ignores the locale, treating all non-ASCII
1729 * bytes as if they are not letters.
1731 * This function should be used only on strings that are known to be
1732 * in encodings where the bytes corresponding to ASCII letters always
1733 * represent themselves. This includes UTF-8 and the ISO-8859-*
1734 * charsets, but not for instance double-byte encodings like the
1735 * Windows Codepage 932, where the trailing bytes of double-byte
1736 * characters include all ASCII letters. If you compare two CP932
1737 * strings using this function, you will get false matches.
1739 * Both @s1 and @s2 must be non-%NULL.
1741 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1742 * or a positive value if @s1 > @s2.
1745 g_ascii_strcasecmp (const gchar *s1,
1750 g_return_val_if_fail (s1 != NULL, 0);
1751 g_return_val_if_fail (s2 != NULL, 0);
1755 c1 = (gint)(guchar) TOLOWER (*s1);
1756 c2 = (gint)(guchar) TOLOWER (*s2);
1762 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1766 * g_ascii_strncasecmp:
1767 * @s1: string to compare with @s2
1768 * @s2: string to compare with @s1
1769 * @n: number of characters to compare
1771 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1772 * characters after the first @n in each string.
1774 * Unlike the BSD strcasecmp() function, this only recognizes standard
1775 * ASCII letters and ignores the locale, treating all non-ASCII
1776 * characters as if they are not letters.
1778 * The same warning as in g_ascii_strcasecmp() applies: Use this
1779 * function only on strings known to be in encodings where bytes
1780 * corresponding to ASCII letters always represent themselves.
1782 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1783 * or a positive value if @s1 > @s2.
1786 g_ascii_strncasecmp (const gchar *s1,
1792 g_return_val_if_fail (s1 != NULL, 0);
1793 g_return_val_if_fail (s2 != NULL, 0);
1795 while (n && *s1 && *s2)
1798 c1 = (gint)(guchar) TOLOWER (*s1);
1799 c2 = (gint)(guchar) TOLOWER (*s2);
1806 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1814 * @s2: a string to compare with @s1
1816 * A case-insensitive string comparison, corresponding to the standard
1817 * strcasecmp() function on platforms which support it.
1819 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1820 * or a positive value if @s1 > @s2.
1822 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1823 * function is deprecated and how to replace it.
1826 g_strcasecmp (const gchar *s1,
1829 #ifdef HAVE_STRCASECMP
1830 g_return_val_if_fail (s1 != NULL, 0);
1831 g_return_val_if_fail (s2 != NULL, 0);
1833 return strcasecmp (s1, s2);
1837 g_return_val_if_fail (s1 != NULL, 0);
1838 g_return_val_if_fail (s2 != NULL, 0);
1842 /* According to A. Cox, some platforms have islower's that
1843 * don't work right on non-uppercase
1845 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1846 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1852 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1859 * @s2: a string to compare with @s1
1860 * @n: the maximum number of characters to compare
1862 * A case-insensitive string comparison, corresponding to the standard
1863 * strncasecmp() function on platforms which support it. It is similar
1864 * to g_strcasecmp() except it only compares the first @n characters of
1867 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1868 * or a positive value if @s1 > @s2.
1870 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1871 * the comparison by calling toupper()/tolower(). These functions
1872 * are locale-specific and operate on single bytes. However, it is
1873 * impossible to handle things correctly from an internationalization
1874 * standpoint by operating on bytes, since characters may be multibyte.
1875 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1876 * ASCII, since it is locale-sensitive, and it's broken if your string
1877 * is localized, since it doesn't work on many encodings at all,
1878 * including UTF-8, EUC-JP, etc.
1880 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1881 * which only works on ASCII and is not locale-sensitive, and
1882 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1883 * which is good for case-insensitive sorting of UTF-8.
1886 g_strncasecmp (const gchar *s1,
1890 #ifdef HAVE_STRNCASECMP
1891 return strncasecmp (s1, s2, n);
1895 g_return_val_if_fail (s1 != NULL, 0);
1896 g_return_val_if_fail (s2 != NULL, 0);
1898 while (n && *s1 && *s2)
1901 /* According to A. Cox, some platforms have islower's that
1902 * don't work right on non-uppercase
1904 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1905 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1912 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1920 * @string: the string to convert
1921 * @delimiters: (allow-none): a string containing the current delimiters,
1922 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1923 * @new_delimiter: the new delimiter character
1925 * Converts any delimiter characters in @string to @new_delimiter.
1926 * Any characters in @string which are found in @delimiters are
1927 * changed to the @new_delimiter character. Modifies @string in place,
1928 * and returns @string itself, not a copy. The return value is to
1929 * allow nesting such as
1931 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
1937 g_strdelimit (gchar *string,
1938 const gchar *delimiters,
1943 g_return_val_if_fail (string != NULL, NULL);
1946 delimiters = G_STR_DELIMITERS;
1948 for (c = string; *c; c++)
1950 if (strchr (delimiters, *c))
1959 * @string: a nul-terminated array of bytes
1960 * @valid_chars: bytes permitted in @string
1961 * @substitutor: replacement character for disallowed bytes
1963 * For each character in @string, if the character is not in @valid_chars,
1964 * replaces the character with @substitutor. Modifies @string in place,
1965 * and return @string itself, not a copy. The return value is to allow
1968 * g_ascii_strup (g_strcanon (str, "abc", '?'))
1974 g_strcanon (gchar *string,
1975 const gchar *valid_chars,
1980 g_return_val_if_fail (string != NULL, NULL);
1981 g_return_val_if_fail (valid_chars != NULL, NULL);
1983 for (c = string; *c; c++)
1985 if (!strchr (valid_chars, *c))
1994 * @source: a string to compress
1996 * Replaces all escaped characters with their one byte equivalent.
1998 * This function does the reverse conversion of g_strescape().
2000 * Returns: a newly-allocated copy of @source with all escaped
2001 * character compressed
2004 g_strcompress (const gchar *source)
2006 const gchar *p = source, *octal;
2010 g_return_val_if_fail (source != NULL, NULL);
2012 dest = g_malloc (strlen (source) + 1);
2023 g_warning ("g_strcompress: trailing \\");
2025 case '0': case '1': case '2': case '3': case '4':
2026 case '5': case '6': case '7':
2029 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2031 *q = (*q * 8) + (*p - '0');
2055 default: /* Also handles \" and \\ */
2072 * @source: a string to escape
2073 * @exceptions: a string of characters not to escape in @source
2075 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2076 * and '"' in the string @source by inserting a '\' before
2077 * them. Additionally all characters in the range 0x01-0x1F (everything
2078 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2079 * replaced with a '\' followed by their octal representation.
2080 * Characters supplied in @exceptions are not escaped.
2082 * g_strcompress() does the reverse conversion.
2084 * Returns: a newly-allocated copy of @source with certain
2085 * characters escaped. See above.
2088 g_strescape (const gchar *source,
2089 const gchar *exceptions)
2096 g_return_val_if_fail (source != NULL, NULL);
2098 p = (guchar *) source;
2099 /* Each source byte needs maximally four destination chars (\777) */
2100 q = dest = g_malloc (strlen (source) * 4 + 1);
2102 memset (excmap, 0, 256);
2105 guchar *e = (guchar *) exceptions;
2155 if ((*p < ' ') || (*p >= 0177))
2158 *q++ = '0' + (((*p) >> 6) & 07);
2159 *q++ = '0' + (((*p) >> 3) & 07);
2160 *q++ = '0' + ((*p) & 07);
2175 * @string: a string to remove the leading whitespace from
2177 * Removes leading whitespace from a string, by moving the rest
2178 * of the characters forward.
2180 * This function doesn't allocate or reallocate any memory;
2181 * it modifies @string in place. Therefore, it cannot be used on
2182 * statically allocated strings.
2184 * The pointer to @string is returned to allow the nesting of functions.
2186 * Also see g_strchomp() and g_strstrip().
2191 g_strchug (gchar *string)
2195 g_return_val_if_fail (string != NULL, NULL);
2197 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2200 memmove (string, start, strlen ((gchar *) start) + 1);
2207 * @string: a string to remove the trailing whitespace from
2209 * Removes trailing whitespace from a string.
2211 * This function doesn't allocate or reallocate any memory;
2212 * it modifies @string in place. Therefore, it cannot be used
2213 * on statically allocated strings.
2215 * The pointer to @string is returned to allow the nesting of functions.
2217 * Also see g_strchug() and g_strstrip().
2222 g_strchomp (gchar *string)
2226 g_return_val_if_fail (string != NULL, NULL);
2228 len = strlen (string);
2231 if (g_ascii_isspace ((guchar) string[len]))
2242 * @string: a string to split
2243 * @delimiter: a string which specifies the places at which to split
2244 * the string. The delimiter is not included in any of the resulting
2245 * strings, unless @max_tokens is reached.
2246 * @max_tokens: the maximum number of pieces to split @string into.
2247 * If this is less than 1, the string is split completely.
2249 * Splits a string into a maximum of @max_tokens pieces, using the given
2250 * @delimiter. If @max_tokens is reached, the remainder of @string is
2251 * appended to the last token.
2253 * As a special case, the result of splitting the empty string "" is an empty
2254 * vector, not a vector containing a single string. The reason for this
2255 * special case is that being able to represent a empty vector is typically
2256 * more useful than consistent handling of empty elements. If you do need
2257 * to represent empty elements, you'll need to check for the empty string
2258 * before calling g_strsplit().
2260 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2261 * g_strfreev() to free it.
2264 g_strsplit (const gchar *string,
2265 const gchar *delimiter,
2268 GSList *string_list = NULL, *slist;
2269 gchar **str_array, *s;
2271 const gchar *remainder;
2273 g_return_val_if_fail (string != NULL, NULL);
2274 g_return_val_if_fail (delimiter != NULL, NULL);
2275 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2278 max_tokens = G_MAXINT;
2281 s = strstr (remainder, delimiter);
2284 gsize delimiter_len = strlen (delimiter);
2286 while (--max_tokens && s)
2290 len = s - remainder;
2291 string_list = g_slist_prepend (string_list,
2292 g_strndup (remainder, len));
2294 remainder = s + delimiter_len;
2295 s = strstr (remainder, delimiter);
2301 string_list = g_slist_prepend (string_list, g_strdup (remainder));
2304 str_array = g_new (gchar*, n + 1);
2306 str_array[n--] = NULL;
2307 for (slist = string_list; slist; slist = slist->next)
2308 str_array[n--] = slist->data;
2310 g_slist_free (string_list);
2317 * @string: The string to be tokenized
2318 * @delimiters: A nul-terminated string containing bytes that are used
2319 * to split the string.
2320 * @max_tokens: The maximum number of tokens to split @string into.
2321 * If this is less than 1, the string is split completely
2323 * Splits @string into a number of tokens not containing any of the characters
2324 * in @delimiter. A token is the (possibly empty) longest string that does not
2325 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2326 * remainder is appended to the last token.
2328 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2329 * %NULL-terminated vector containing the three strings "abc", "def",
2332 * The result if g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2333 * vector containing the four strings "", "def", "ghi", and "".
2335 * As a special case, the result of splitting the empty string "" is an empty
2336 * vector, not a vector containing a single string. The reason for this
2337 * special case is that being able to represent a empty vector is typically
2338 * more useful than consistent handling of empty elements. If you do need
2339 * to represent empty elements, you'll need to check for the empty string
2340 * before calling g_strsplit_set().
2342 * Note that this function works on bytes not characters, so it can't be used
2343 * to delimit UTF-8 strings for anything but ASCII characters.
2345 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2346 * g_strfreev() to free it.
2351 g_strsplit_set (const gchar *string,
2352 const gchar *delimiters,
2355 gboolean delim_table[256];
2356 GSList *tokens, *list;
2359 const gchar *current;
2363 g_return_val_if_fail (string != NULL, NULL);
2364 g_return_val_if_fail (delimiters != NULL, NULL);
2367 max_tokens = G_MAXINT;
2369 if (*string == '\0')
2371 result = g_new (char *, 1);
2376 memset (delim_table, FALSE, sizeof (delim_table));
2377 for (s = delimiters; *s != '\0'; ++s)
2378 delim_table[*(guchar *)s] = TRUE;
2383 s = current = string;
2386 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2388 token = g_strndup (current, s - current);
2389 tokens = g_slist_prepend (tokens, token);
2398 token = g_strndup (current, s - current);
2399 tokens = g_slist_prepend (tokens, token);
2402 result = g_new (gchar *, n_tokens + 1);
2404 result[n_tokens] = NULL;
2405 for (list = tokens; list != NULL; list = list->next)
2406 result[--n_tokens] = list->data;
2408 g_slist_free (tokens);
2415 * @str_array: a %NULL-terminated array of strings to free
2417 * Frees a %NULL-terminated array of strings, and the array itself.
2418 * If called on a %NULL value, g_strfreev() simply returns.
2421 g_strfreev (gchar **str_array)
2427 for (i = 0; str_array[i] != NULL; i++)
2428 g_free (str_array[i]);
2436 * @str_array: a %NULL-terminated array of strings
2438 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2439 * the new array should be freed by first freeing each string, then
2440 * the array itself. g_strfreev() does this for you. If called
2441 * on a %NULL value, g_strdupv() simply returns %NULL.
2443 * Return value: a new %NULL-terminated array of strings.
2446 g_strdupv (gchar **str_array)
2454 while (str_array[i])
2457 retval = g_new (gchar*, i + 1);
2460 while (str_array[i])
2462 retval[i] = g_strdup (str_array[i]);
2475 * @separator: (allow-none): a string to insert between each of the
2477 * @str_array: a %NULL-terminated array of strings to join
2479 * Joins a number of strings together to form one long string, with the
2480 * optional @separator inserted between each of them. The returned string
2481 * should be freed with g_free().
2483 * Returns: a newly-allocated string containing all of the strings joined
2484 * together, with @separator between them
2487 g_strjoinv (const gchar *separator,
2493 g_return_val_if_fail (str_array != NULL, NULL);
2495 if (separator == NULL)
2502 gsize separator_len;
2504 separator_len = strlen (separator);
2505 /* First part, getting length */
2506 len = 1 + strlen (str_array[0]);
2507 for (i = 1; str_array[i] != NULL; i++)
2508 len += strlen (str_array[i]);
2509 len += separator_len * (i - 1);
2511 /* Second part, building string */
2512 string = g_new (gchar, len);
2513 ptr = g_stpcpy (string, *str_array);
2514 for (i = 1; str_array[i] != NULL; i++)
2516 ptr = g_stpcpy (ptr, separator);
2517 ptr = g_stpcpy (ptr, str_array[i]);
2521 string = g_strdup ("");
2528 * @separator: (allow-none): a string to insert between each of the
2530 * @...: a %NULL-terminated list of strings to join
2532 * Joins a number of strings together to form one long string, with the
2533 * optional @separator inserted between each of them. The returned string
2534 * should be freed with g_free().
2536 * Returns: a newly-allocated string containing all of the strings joined
2537 * together, with @separator between them
2540 g_strjoin (const gchar *separator,
2546 gsize separator_len;
2549 if (separator == NULL)
2552 separator_len = strlen (separator);
2554 va_start (args, separator);
2556 s = va_arg (args, gchar*);
2560 /* First part, getting length */
2561 len = 1 + strlen (s);
2563 s = va_arg (args, gchar*);
2566 len += separator_len + strlen (s);
2567 s = va_arg (args, gchar*);
2571 /* Second part, building string */
2572 string = g_new (gchar, len);
2574 va_start (args, separator);
2576 s = va_arg (args, gchar*);
2577 ptr = g_stpcpy (string, s);
2579 s = va_arg (args, gchar*);
2582 ptr = g_stpcpy (ptr, separator);
2583 ptr = g_stpcpy (ptr, s);
2584 s = va_arg (args, gchar*);
2588 string = g_strdup ("");
2598 * @haystack: a string
2599 * @haystack_len: the maximum length of @haystack. Note that -1 is
2600 * a valid length, if @haystack is nul-terminated, meaning it will
2601 * search through the whole string.
2602 * @needle: the string to search for
2604 * Searches the string @haystack for the first occurrence
2605 * of the string @needle, limiting the length of the search
2608 * Return value: a pointer to the found occurrence, or
2609 * %NULL if not found.
2612 g_strstr_len (const gchar *haystack,
2613 gssize haystack_len,
2614 const gchar *needle)
2616 g_return_val_if_fail (haystack != NULL, NULL);
2617 g_return_val_if_fail (needle != NULL, NULL);
2619 if (haystack_len < 0)
2620 return strstr (haystack, needle);
2623 const gchar *p = haystack;
2624 gsize needle_len = strlen (needle);
2628 if (needle_len == 0)
2629 return (gchar *)haystack;
2631 if (haystack_len < needle_len)
2634 end = haystack + haystack_len - needle_len;
2636 while (p <= end && *p)
2638 for (i = 0; i < needle_len; i++)
2639 if (p[i] != needle[i])
2654 * @haystack: a nul-terminated string
2655 * @needle: the nul-terminated string to search for
2657 * Searches the string @haystack for the last occurrence
2658 * of the string @needle.
2660 * Return value: a pointer to the found occurrence, or
2661 * %NULL if not found.
2664 g_strrstr (const gchar *haystack,
2665 const gchar *needle)
2672 g_return_val_if_fail (haystack != NULL, NULL);
2673 g_return_val_if_fail (needle != NULL, NULL);
2675 needle_len = strlen (needle);
2676 haystack_len = strlen (haystack);
2678 if (needle_len == 0)
2679 return (gchar *)haystack;
2681 if (haystack_len < needle_len)
2684 p = haystack + haystack_len - needle_len;
2686 while (p >= haystack)
2688 for (i = 0; i < needle_len; i++)
2689 if (p[i] != needle[i])
2703 * @haystack: a nul-terminated string
2704 * @haystack_len: the maximum length of @haystack
2705 * @needle: the nul-terminated string to search for
2707 * Searches the string @haystack for the last occurrence
2708 * of the string @needle, limiting the length of the search
2711 * Return value: a pointer to the found occurrence, or
2712 * %NULL if not found.
2715 g_strrstr_len (const gchar *haystack,
2716 gssize haystack_len,
2717 const gchar *needle)
2719 g_return_val_if_fail (haystack != NULL, NULL);
2720 g_return_val_if_fail (needle != NULL, NULL);
2722 if (haystack_len < 0)
2723 return g_strrstr (haystack, needle);
2726 gsize needle_len = strlen (needle);
2727 const gchar *haystack_max = haystack + haystack_len;
2728 const gchar *p = haystack;
2731 while (p < haystack_max && *p)
2734 if (p < haystack + needle_len)
2739 while (p >= haystack)
2741 for (i = 0; i < needle_len; i++)
2742 if (p[i] != needle[i])
2758 * @str: a nul-terminated string
2759 * @suffix: the nul-terminated suffix to look for
2761 * Looks whether the string @str ends with @suffix.
2763 * Return value: %TRUE if @str end with @suffix, %FALSE otherwise.
2768 g_str_has_suffix (const gchar *str,
2769 const gchar *suffix)
2774 g_return_val_if_fail (str != NULL, FALSE);
2775 g_return_val_if_fail (suffix != NULL, FALSE);
2777 str_len = strlen (str);
2778 suffix_len = strlen (suffix);
2780 if (str_len < suffix_len)
2783 return strcmp (str + str_len - suffix_len, suffix) == 0;
2788 * @str: a nul-terminated string
2789 * @prefix: the nul-terminated prefix to look for
2791 * Looks whether the string @str begins with @prefix.
2793 * Return value: %TRUE if @str begins with @prefix, %FALSE otherwise.
2798 g_str_has_prefix (const gchar *str,
2799 const gchar *prefix)
2804 g_return_val_if_fail (str != NULL, FALSE);
2805 g_return_val_if_fail (prefix != NULL, FALSE);
2807 str_len = strlen (str);
2808 prefix_len = strlen (prefix);
2810 if (str_len < prefix_len)
2813 return strncmp (str, prefix, prefix_len) == 0;
2818 * @str_array: a %NULL-terminated array of strings
2820 * Returns the length of the given %NULL-terminated
2821 * string array @str_array.
2823 * Return value: length of @str_array.
2828 g_strv_length (gchar **str_array)
2832 g_return_val_if_fail (str_array != NULL, 0);
2834 while (str_array[i])
2841 index_add_folded (GPtrArray *array,
2847 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2849 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2850 if (strstr (normal, "ı") || strstr (normal, "İ"))
2855 tmp = g_string_new (NULL);
2861 i = strstr (s, "ı");
2862 I = strstr (s, "İ");
2875 g_string_append_len (tmp, s, e - s);
2876 g_string_append_c (tmp, 'i');
2877 s = g_utf8_next_char (e);
2880 g_string_append (tmp, s);
2882 normal = g_string_free (tmp, FALSE);
2885 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2890 split_words (const gchar *value)
2892 const gchar *start = NULL;
2896 result = g_ptr_array_new ();
2898 for (s = value; *s; s = g_utf8_next_char (s))
2900 gunichar c = g_utf8_get_char (s);
2904 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2909 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2911 index_add_folded (result, start, s);
2918 index_add_folded (result, start, s);
2920 g_ptr_array_add (result, NULL);
2922 return (gchar **) g_ptr_array_free (result, FALSE);
2926 * g_str_tokenize_and_fold:
2928 * @translit_locale: (allow-none): the language code (like 'de' or
2929 * 'en_GB') from which @string originates
2930 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2931 * return location for ASCII alternates
2933 * Tokenises @string and performs folding on each token.
2935 * A token is a non-empty sequence of alphanumeric characters in the
2936 * source string, separated by non-alphanumeric characters. An
2937 * "alphanumeric" character for this purpose is one that matches
2938 * g_unichar_isalnum() or g_unichar_ismark().
2940 * Each token is then (Unicode) normalised and case-folded. If
2941 * @ascii_alternates is non-%NULL and some of the returned tokens
2942 * contain non-ASCII characters, ASCII alternatives will be generated.
2944 * The number of ASCII alternatives that are generated and the method
2945 * for doing so is unspecified, but @translit_locale (if specified) may
2946 * improve the transliteration if the language of the source string is
2949 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
2954 g_str_tokenize_and_fold (const gchar *string,
2955 const gchar *translit_locale,
2956 gchar ***ascii_alternates)
2960 if (ascii_alternates && g_str_is_ascii (string))
2962 *ascii_alternates = g_new0 (gchar *, 0 + 1);
2963 ascii_alternates = NULL;
2966 result = split_words (string);
2968 /* TODO: proper iconv transliteration (locale-dependent) */
2969 if (ascii_alternates)
2973 n = g_strv_length (result);
2974 *ascii_alternates = g_new (gchar *, n + 1);
2977 for (i = 0; i < n; i++)
2979 if (!g_str_is_ascii (result[i]))
2986 decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL);
2987 ascii = g_malloc (strlen (decomposed) + 1);
2989 for (k = 0; decomposed[k]; k++)
2990 if (~decomposed[k] & 0x80)
2991 ascii[l++] = decomposed[k];
2994 (*ascii_alternates)[j++] = ascii;
2995 g_free (decomposed);
2999 (*ascii_alternates)[j] = NULL;
3006 * g_str_match_string:
3007 * @search_term: the search term from the user
3008 * @potential_hit: the text that may be a hit
3009 * @accept_alternates: %TRUE to accept ASCII alternates
3011 * Checks if a search conducted for @search_term should match
3014 * This function calls g_str_tokenize_and_fold() on both
3015 * @search_term and @potential_hit. ASCII alternates are never taken
3016 * for @search_term but will be taken for @potential_hit according to
3017 * the value of @accept_alternates.
3019 * A hit occurs when each folded token in @search_term is a prefix of a
3020 * folded token from @potential_hit.
3022 * Depending on how you're performing the search, it will typically be
3023 * faster to call g_str_tokenize_and_fold() on each string in
3024 * your corpus and build an index on the returned folded tokens, then
3025 * call g_str_tokenize_and_fold() on the search term and
3026 * perform lookups into that index.
3028 * As some examples, searching for "fred" would match the potential hit
3029 * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match
3030 * "Frédéric" but not "Frederic" (due to the one-directional nature of
3031 * accent matching). Searching "fo" would match "Foo" and "Bar Foo
3032 * Baz", but not "SFO" (because no word as "fo" as a prefix).
3034 * Returns: %TRUE if @potential_hit is a hit
3039 g_str_match_string (const gchar *search_term,
3040 const gchar *potential_hit,
3041 gboolean accept_alternates)
3043 gchar **alternates = NULL;
3044 gchar **term_tokens;
3049 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3050 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3054 for (i = 0; term_tokens[i]; i++)
3056 for (j = 0; hit_tokens[j]; j++)
3057 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3060 if (accept_alternates)
3061 for (j = 0; alternates[j]; j++)
3062 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3072 g_strfreev (term_tokens);
3073 g_strfreev (hit_tokens);
3074 g_strfreev (alternates);