1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
38 #include <ctype.h> /* For tolower() */
41 /* Needed on BSD/OS X for e.g. strtod_l */
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51 #include "gstrfuncs.h"
54 #include "gprintfint.h"
59 * SECTION:string_utils
60 * @title: String Utility Functions
61 * @short_description: various string-related functions
63 * This section describes a number of utility functions for creating,
64 * duplicating, and manipulating strings.
66 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67 * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
68 * are declared in the header `gprintf.h` which is not included in `glib.h`
69 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
70 * explicitly include `<glib/gprintf.h>` in order to use the GLib
73 * <para id="string-precision">
74 * While you may use the printf() functions to format UTF-8 strings,
75 * notice that the precision of a %Ns parameter is interpreted
76 * as the number of bytes, not characters to print. On top of that,
77 * the GNU libc implementation of the printf() functions has the
78 * "feature" that it checks that the string given for the %Ns
79 * parameter consists of a whole number of characters in the current
80 * encoding. So, unless you are sure you are always going to be in an
81 * UTF-8 locale or your know your text is restricted to ASCII, avoid
82 * using %Ns. If your intention is to format strings for a
83 * certain number of columns, then %Ns is not a correct solution
84 * anyway, since it fails to take wide characters (see g_unichar_iswide())
93 * Determines whether a character is alphanumeric.
95 * Unlike the standard C library isalnum() function, this only
96 * recognizes standard ASCII letters and ignores the locale,
97 * returning %FALSE for all non-ASCII characters. Also, unlike
98 * the standard library function, this takes a char, not an int,
99 * so don't call it on %EOF, but no need to cast to #guchar before
100 * passing a possibly non-ASCII character in.
102 * Returns: %TRUE if @c is an ASCII alphanumeric character
109 * Determines whether a character is alphabetic (i.e. a letter).
111 * Unlike the standard C library isalpha() function, this only
112 * recognizes standard ASCII letters and ignores the locale,
113 * returning %FALSE for all non-ASCII characters. Also, unlike
114 * the standard library function, this takes a char, not an int,
115 * so don't call it on %EOF, but no need to cast to #guchar before
116 * passing a possibly non-ASCII character in.
118 * Returns: %TRUE if @c is an ASCII alphabetic character
125 * Determines whether a character is a control character.
127 * Unlike the standard C library iscntrl() function, this only
128 * recognizes standard ASCII control characters and ignores the
129 * locale, returning %FALSE for all non-ASCII characters. Also,
130 * unlike the standard library function, this takes a char, not
131 * an int, so don't call it on %EOF, but no need to cast to #guchar
132 * before passing a possibly non-ASCII character in.
134 * Returns: %TRUE if @c is an ASCII control character.
141 * Determines whether a character is digit (0-9).
143 * Unlike the standard C library isdigit() function, this takes
144 * a char, not an int, so don't call it on %EOF, but no need to
145 * cast to #guchar before passing a possibly non-ASCII character in.
147 * Returns: %TRUE if @c is an ASCII digit.
154 * Determines whether a character is a printing character and not a space.
156 * Unlike the standard C library isgraph() function, this only
157 * recognizes standard ASCII characters and ignores the locale,
158 * returning %FALSE for all non-ASCII characters. Also, unlike
159 * the standard library function, this takes a char, not an int,
160 * so don't call it on %EOF, but no need to cast to #guchar before
161 * passing a possibly non-ASCII character in.
163 * Returns: %TRUE if @c is an ASCII printing character other than space.
170 * Determines whether a character is an ASCII lower case letter.
172 * Unlike the standard C library islower() function, this only
173 * recognizes standard ASCII letters and ignores the locale,
174 * returning %FALSE for all non-ASCII characters. Also, unlike
175 * the standard library function, this takes a char, not an int,
176 * so don't call it on %EOF, but no need to worry about casting
177 * to #guchar before passing a possibly non-ASCII character in.
179 * Returns: %TRUE if @c is an ASCII lower case letter
186 * Determines whether a character is a printing character.
188 * Unlike the standard C library isprint() function, this only
189 * recognizes standard ASCII characters and ignores the locale,
190 * returning %FALSE for all non-ASCII characters. Also, unlike
191 * the standard library function, this takes a char, not an int,
192 * so don't call it on %EOF, but no need to cast to #guchar before
193 * passing a possibly non-ASCII character in.
195 * Returns: %TRUE if @c is an ASCII printing character.
202 * Determines whether a character is a punctuation character.
204 * Unlike the standard C library ispunct() function, this only
205 * recognizes standard ASCII letters and ignores the locale,
206 * returning %FALSE for all non-ASCII characters. Also, unlike
207 * the standard library function, this takes a char, not an int,
208 * so don't call it on %EOF, but no need to cast to #guchar before
209 * passing a possibly non-ASCII character in.
211 * Returns: %TRUE if @c is an ASCII punctuation character.
218 * Determines whether a character is a white-space character.
220 * Unlike the standard C library isspace() function, this only
221 * recognizes standard ASCII white-space and ignores the locale,
222 * returning %FALSE for all non-ASCII characters. Also, unlike
223 * the standard library function, this takes a char, not an int,
224 * so don't call it on %EOF, but no need to cast to #guchar before
225 * passing a possibly non-ASCII character in.
227 * Returns: %TRUE if @c is an ASCII white-space character
234 * Determines whether a character is an ASCII upper case letter.
236 * Unlike the standard C library isupper() function, this only
237 * recognizes standard ASCII letters and ignores the locale,
238 * returning %FALSE for all non-ASCII characters. Also, unlike
239 * the standard library function, this takes a char, not an int,
240 * so don't call it on %EOF, but no need to worry about casting
241 * to #guchar before passing a possibly non-ASCII character in.
243 * Returns: %TRUE if @c is an ASCII upper case letter
250 * Determines whether a character is a hexadecimal-digit character.
252 * Unlike the standard C library isxdigit() function, this takes
253 * a char, not an int, so don't call it on %EOF, but no need to
254 * cast to #guchar before passing a possibly non-ASCII character in.
256 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
260 * G_ASCII_DTOSTR_BUF_SIZE:
262 * A good size for a buffer to be passed into g_ascii_dtostr().
263 * It is guaranteed to be enough for all output of that function
264 * on systems with 64bit IEEE-compatible doubles.
266 * The typical usage would be something like:
267 * |[<!-- language="C" -->
268 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
270 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
276 * @string: a string to remove the leading and trailing whitespace from
278 * Removes leading and trailing whitespace from a string.
279 * See g_strchomp() and g_strchug().
287 * The standard delimiters, used in g_strdelimit().
290 static const guint16 ascii_table_data[256] = {
291 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
292 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
293 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
294 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
295 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
296 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
297 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
298 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
299 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
300 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
301 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
302 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
303 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
304 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
305 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
306 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
307 /* the upper 128 are all zeroes */
310 const guint16 * const g_ascii_table = ascii_table_data;
312 #if defined (HAVE_NEWLOCALE) && \
313 defined (HAVE_USELOCALE) && \
314 defined (HAVE_STRTOD_L) && \
315 defined (HAVE_STRTOULL_L) && \
316 defined (HAVE_STRTOLL_L)
317 #define USE_XLOCALE 1
324 static gsize initialized = FALSE;
325 static locale_t C_locale = NULL;
327 if (g_once_init_enter (&initialized))
329 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
330 g_once_init_leave (&initialized, TRUE);
339 * @str: the string to duplicate
341 * Duplicates a string. If @str is %NULL it returns %NULL.
342 * The returned string should be freed with g_free()
343 * when no longer needed.
345 * Returns: a newly-allocated copy of @str
348 g_strdup (const gchar *str)
355 length = strlen (str) + 1;
356 new_str = g_new (char, length);
357 memcpy (new_str, str, length);
367 * @mem: the memory to copy.
368 * @byte_size: the number of bytes to copy.
370 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
371 * from @mem. If @mem is %NULL it returns %NULL.
373 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
377 g_memdup (gconstpointer mem,
384 new_mem = g_malloc (byte_size);
385 memcpy (new_mem, mem, byte_size);
395 * @str: the string to duplicate
396 * @n: the maximum number of bytes to copy from @str
398 * Duplicates the first @n bytes of a string, returning a newly-allocated
399 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
400 * is less than @n bytes long the buffer is padded with nuls. If @str is
401 * %NULL it returns %NULL. The returned value should be freed when no longer
404 * To copy a number of characters from a UTF-8 encoded string, use
405 * g_utf8_strncpy() instead.
407 * Returns: a newly-allocated buffer containing the first @n bytes
408 * of @str, nul-terminated
411 g_strndup (const gchar *str,
418 new_str = g_new (gchar, n + 1);
419 strncpy (new_str, str, n);
430 * @length: the length of the new string
431 * @fill_char: the byte to fill the string with
433 * Creates a new string @length bytes long filled with @fill_char.
434 * The returned string should be freed when no longer needed.
436 * Returns: a newly-allocated string filled the @fill_char
439 g_strnfill (gsize length,
444 str = g_new (gchar, length + 1);
445 memset (str, (guchar)fill_char, length);
453 * @dest: destination buffer.
454 * @src: source string.
456 * Copies a nul-terminated string into the dest buffer, include the
457 * trailing nul, and return a pointer to the trailing nul byte.
458 * This is useful for concatenating multiple strings together
459 * without having to repeatedly scan for the end.
461 * Return value: a pointer to trailing nul byte.
464 g_stpcpy (gchar *dest,
468 g_return_val_if_fail (dest != NULL, NULL);
469 g_return_val_if_fail (src != NULL, NULL);
470 return stpcpy (dest, src);
472 register gchar *d = dest;
473 register const gchar *s = src;
475 g_return_val_if_fail (dest != NULL, NULL);
476 g_return_val_if_fail (src != NULL, NULL);
479 while (*s++ != '\0');
487 * @format: a standard printf() format string, but notice
488 * <link linkend="string-precision">string precision pitfalls</link>
489 * @args: the list of parameters to insert into the format string
491 * Similar to the standard C vsprintf() function but safer, since it
492 * calculates the maximum space required and allocates memory to hold
493 * the result. The returned string should be freed with g_free() when
496 * See also g_vasprintf(), which offers the same functionality, but
497 * additionally returns the length of the allocated string.
499 * Returns: a newly-allocated string holding the result
502 g_strdup_vprintf (const gchar *format,
505 gchar *string = NULL;
507 g_vasprintf (&string, format, args);
514 * @format: a standard printf() format string, but notice
515 * <link linkend="string-precision">string precision pitfalls</link>
516 * @...: the parameters to insert into the format string
518 * Similar to the standard C sprintf() function but safer, since it
519 * calculates the maximum space required and allocates memory to hold
520 * the result. The returned string should be freed with g_free() when no
523 * Returns: a newly-allocated string holding the result
526 g_strdup_printf (const gchar *format,
532 va_start (args, format);
533 buffer = g_strdup_vprintf (format, args);
541 * @string1: the first string to add, which must not be %NULL
542 * @...: a %NULL-terminated list of strings to append to the string
544 * Concatenates all of the given strings into one long string. The
545 * returned string should be freed with g_free() when no longer needed.
547 * The variable argument list must end with %NULL. If you forget the %NULL,
548 * g_strconcat() will start appending random memory junk to your string.
550 * Note that this function is usually not the right function to use to
551 * assemble a translated message from pieces, since proper translation
552 * often requires the pieces to be reordered.
554 * Returns: a newly-allocated string containing all the string arguments
557 g_strconcat (const gchar *string1, ...)
568 l = 1 + strlen (string1);
569 va_start (args, string1);
570 s = va_arg (args, gchar*);
574 s = va_arg (args, gchar*);
578 concat = g_new (gchar, l);
581 ptr = g_stpcpy (ptr, string1);
582 va_start (args, string1);
583 s = va_arg (args, gchar*);
586 ptr = g_stpcpy (ptr, s);
587 s = va_arg (args, gchar*);
596 * @nptr: the string to convert to a numeric value.
597 * @endptr: if non-%NULL, it returns the character after
598 * the last character used in the conversion.
600 * Converts a string to a #gdouble value.
601 * It calls the standard strtod() function to handle the conversion, but
602 * if the string is not completely converted it attempts the conversion
603 * again with g_ascii_strtod(), and returns the best match.
605 * This function should seldom be used. The normal situation when reading
606 * numbers not for human consumption is to use g_ascii_strtod(). Only when
607 * you know that you must expect both locale formatted and C formatted numbers
608 * should you use this. Make sure that you don't pass strings such as comma
609 * separated lists of values, since the commas may be interpreted as a decimal
610 * point in some locales, causing unexpected results.
612 * Return value: the #gdouble value.
615 g_strtod (const gchar *nptr,
623 g_return_val_if_fail (nptr != NULL, 0);
628 val_1 = strtod (nptr, &fail_pos_1);
630 if (fail_pos_1 && fail_pos_1[0] != 0)
631 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
633 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
636 *endptr = fail_pos_1;
642 *endptr = fail_pos_2;
649 * @nptr: the string to convert to a numeric value.
650 * @endptr: if non-%NULL, it returns the character after
651 * the last character used in the conversion.
653 * Converts a string to a #gdouble value.
655 * This function behaves like the standard strtod() function
656 * does in the C locale. It does this without actually changing
657 * the current locale, since that would not be thread-safe.
658 * A limitation of the implementation is that this function
659 * will still accept localized versions of infinities and NANs.
661 * This function is typically used when reading configuration
662 * files or other non-user input that should be locale independent.
663 * To handle input from the user you should normally use the
664 * locale-sensitive system strtod() function.
666 * To convert from a #gdouble to a string in a locale-insensitive
667 * way, use g_ascii_dtostr().
669 * If the correct value would cause overflow, plus or minus %HUGE_VAL
670 * is returned (according to the sign of the value), and %ERANGE is
671 * stored in %errno. If the correct value would cause underflow,
672 * zero is returned and %ERANGE is stored in %errno.
674 * This function resets %errno before calling strtod() so that
675 * you can reliably detect overflow and underflow.
677 * Return value: the #gdouble value.
680 g_ascii_strtod (const gchar *nptr,
685 g_return_val_if_fail (nptr != NULL, 0);
689 return strtod_l (nptr, endptr, get_C_locale ());
696 struct lconv *locale_data;
698 const char *decimal_point;
699 int decimal_point_len;
700 const char *p, *decimal_point_pos;
701 const char *end = NULL; /* Silence gcc */
704 g_return_val_if_fail (nptr != NULL, 0);
709 locale_data = localeconv ();
710 decimal_point = locale_data->decimal_point;
711 decimal_point_len = strlen (decimal_point);
714 decimal_point_len = 1;
717 g_assert (decimal_point_len != 0);
719 decimal_point_pos = NULL;
722 if (decimal_point[0] != '.' ||
723 decimal_point[1] != 0)
726 /* Skip leading space */
727 while (g_ascii_isspace (*p))
730 /* Skip leading optional sign */
731 if (*p == '+' || *p == '-')
735 (p[1] == 'x' || p[1] == 'X'))
738 /* HEX - find the (optional) decimal point */
740 while (g_ascii_isxdigit (*p))
744 decimal_point_pos = p++;
746 while (g_ascii_isxdigit (*p))
749 if (*p == 'p' || *p == 'P')
751 if (*p == '+' || *p == '-')
753 while (g_ascii_isdigit (*p))
758 else if (g_ascii_isdigit (*p) || *p == '.')
760 while (g_ascii_isdigit (*p))
764 decimal_point_pos = p++;
766 while (g_ascii_isdigit (*p))
769 if (*p == 'e' || *p == 'E')
771 if (*p == '+' || *p == '-')
773 while (g_ascii_isdigit (*p))
778 /* For the other cases, we need not convert the decimal point */
781 if (decimal_point_pos)
785 /* We need to convert the '.' to the locale specific decimal point */
786 copy = g_malloc (end - nptr + 1 + decimal_point_len);
789 memcpy (c, nptr, decimal_point_pos - nptr);
790 c += decimal_point_pos - nptr;
791 memcpy (c, decimal_point, decimal_point_len);
792 c += decimal_point_len;
793 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
794 c += end - (decimal_point_pos + 1);
798 val = strtod (copy, &fail_pos);
799 strtod_errno = errno;
803 if (fail_pos - copy > decimal_point_pos - nptr)
804 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
806 fail_pos = (char *)nptr + (fail_pos - copy);
816 copy = g_malloc (end - (char *)nptr + 1);
817 memcpy (copy, nptr, end - nptr);
818 *(copy + (end - (char *)nptr)) = 0;
821 val = strtod (copy, &fail_pos);
822 strtod_errno = errno;
826 fail_pos = (char *)nptr + (fail_pos - copy);
834 val = strtod (nptr, &fail_pos);
835 strtod_errno = errno;
841 errno = strtod_errno;
850 * @buffer: A buffer to place the resulting string in
851 * @buf_len: The length of the buffer.
852 * @d: The #gdouble to convert
854 * Converts a #gdouble to a string, using the '.' as
857 * This function generates enough precision that converting
858 * the string back using g_ascii_strtod() gives the same machine-number
859 * (on machines with IEEE compatible 64bit doubles). It is
860 * guaranteed that the size of the resulting string will never
861 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes.
863 * Return value: The pointer to the buffer with the converted string.
866 g_ascii_dtostr (gchar *buffer,
870 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
873 #pragma GCC diagnostic push
874 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
878 * @buffer: A buffer to place the resulting string in
879 * @buf_len: The length of the buffer.
880 * @format: The printf()-style format to use for the
881 * code to use for converting.
882 * @d: The #gdouble to convert
884 * Converts a #gdouble to a string, using the '.' as
885 * decimal point. To format the number you pass in
886 * a printf()-style format string. Allowed conversion
887 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
889 * If you just want to want to serialize the value into a
890 * string, use g_ascii_dtostr().
892 * Return value: The pointer to the buffer with the converted string.
895 g_ascii_formatd (gchar *buffer,
903 old_locale = uselocale (get_C_locale ());
904 _g_snprintf (buffer, buf_len, format, d);
905 uselocale (old_locale);
910 struct lconv *locale_data;
912 const char *decimal_point;
913 int decimal_point_len;
918 g_return_val_if_fail (buffer != NULL, NULL);
919 g_return_val_if_fail (format[0] == '%', NULL);
920 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
922 format_char = format[strlen (format) - 1];
924 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
925 format_char == 'f' || format_char == 'F' ||
926 format_char == 'g' || format_char == 'G',
929 if (format[0] != '%')
932 if (strpbrk (format + 1, "'l%"))
935 if (!(format_char == 'e' || format_char == 'E' ||
936 format_char == 'f' || format_char == 'F' ||
937 format_char == 'g' || format_char == 'G'))
940 _g_snprintf (buffer, buf_len, format, d);
943 locale_data = localeconv ();
944 decimal_point = locale_data->decimal_point;
945 decimal_point_len = strlen (decimal_point);
948 decimal_point_len = 1;
951 g_assert (decimal_point_len != 0);
953 if (decimal_point[0] != '.' ||
954 decimal_point[1] != 0)
958 while (g_ascii_isspace (*p))
961 if (*p == '+' || *p == '-')
964 while (isdigit ((guchar)*p))
967 if (strncmp (p, decimal_point, decimal_point_len) == 0)
971 if (decimal_point_len > 1)
973 rest_len = strlen (p + (decimal_point_len-1));
974 memmove (p, p + (decimal_point_len-1), rest_len);
983 #pragma GCC diagnostic pop
985 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
986 (c) == '\r' || (c) == '\t' || (c) == '\v')
987 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
988 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
989 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
990 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
991 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
996 g_parse_long_long (const gchar *nptr,
997 const gchar **endptr,
1001 /* this code is based on on the strtol(3) code from GNU libc released under
1002 * the GNU Lesser General Public License.
1004 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1005 * Free Software Foundation, Inc.
1011 const gchar *s, *save;
1014 g_return_val_if_fail (nptr != NULL, 0);
1017 if (base == 1 || base > 36)
1027 /* Skip white space. */
1028 while (ISSPACE (*s))
1031 if (G_UNLIKELY (!*s))
1034 /* Check for a sign. */
1043 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1046 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1057 /* Save the pointer so we can check later if anything happened. */
1059 cutoff = G_MAXUINT64 / base;
1060 cutlim = G_MAXUINT64 % base;
1067 if (c >= '0' && c <= '9')
1069 else if (ISALPHA (c))
1070 c = TOUPPER (c) - 'A' + 10;
1075 /* Check for overflow. */
1076 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1085 /* Check if anything actually happened. */
1089 /* Store in ENDPTR the address of one character
1090 past the last character we converted. */
1094 if (G_UNLIKELY (overflow))
1103 /* We must handle a special case here: the base is 0 or 16 and the
1104 first two characters are '0' and 'x', but the rest are no
1105 hexadecimal digits. This is no error case. We return 0 and
1106 ENDPTR points to the `x`. */
1109 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1111 *endptr = &save[-1];
1113 /* There was no number to convert. */
1118 #endif /* !USE_XLOCALE */
1122 * @nptr: the string to convert to a numeric value.
1123 * @endptr: if non-%NULL, it returns the character after
1124 * the last character used in the conversion.
1125 * @base: to be used for the conversion, 2..36 or 0
1127 * Converts a string to a #guint64 value.
1128 * This function behaves like the standard strtoull() function
1129 * does in the C locale. It does this without actually
1130 * changing the current locale, since that would not be
1133 * This function is typically used when reading configuration
1134 * files or other non-user input that should be locale independent.
1135 * To handle input from the user you should normally use the
1136 * locale-sensitive system strtoull() function.
1138 * If the correct value would cause overflow, %G_MAXUINT64
1139 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1140 * If the base is outside the valid range, zero is returned, and
1141 * <literal>EINVAL</literal> is stored in <literal>errno</literal>.
1142 * If the string conversion fails, zero is returned, and @endptr returns
1143 * @nptr (if @endptr is non-%NULL).
1145 * Return value: the #guint64 value or zero on error.
1150 g_ascii_strtoull (const gchar *nptr,
1155 return strtoull_l (nptr, endptr, base, get_C_locale ());
1160 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1162 /* Return the result of the appropriate sign. */
1163 return negative ? -result : result;
1169 * @nptr: the string to convert to a numeric value.
1170 * @endptr: if non-%NULL, it returns the character after
1171 * the last character used in the conversion.
1172 * @base: to be used for the conversion, 2..36 or 0
1174 * Converts a string to a #gint64 value.
1175 * This function behaves like the standard strtoll() function
1176 * does in the C locale. It does this without actually
1177 * changing the current locale, since that would not be
1180 * This function is typically used when reading configuration
1181 * files or other non-user input that should be locale independent.
1182 * To handle input from the user you should normally use the
1183 * locale-sensitive system strtoll() function.
1185 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1186 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1187 * If the base is outside the valid range, zero is returned, and
1188 * <literal>EINVAL</literal> is stored in <literal>errno</literal>. If the
1189 * string conversion fails, zero is returned, and @endptr returns @nptr
1190 * (if @endptr is non-%NULL).
1192 * Return value: the #gint64 value or zero on error.
1197 g_ascii_strtoll (const gchar *nptr,
1202 return strtoll_l (nptr, endptr, base, get_C_locale ());
1207 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1209 if (negative && result > (guint64) G_MININT64)
1214 else if (!negative && result > (guint64) G_MAXINT64)
1220 return - (gint64) result;
1222 return (gint64) result;
1228 * @errnum: the system error number. See the standard C %errno
1231 * Returns a string corresponding to the given error code, e.g.
1232 * "no such process". You should use this function in preference to
1233 * strerror(), because it returns a string in UTF-8 encoding, and since
1234 * not all platforms support the strerror() function.
1236 * Returns: a UTF-8 string describing the error code. If the error code
1237 * is unknown, it returns "unknown error (<code>)".
1240 g_strerror (gint errnum)
1243 gchar *tofree = NULL;
1245 gint saved_errno = errno;
1247 msg = strerror (errnum);
1248 if (!g_get_charset (NULL))
1249 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1251 ret = g_intern_string (msg);
1253 errno = saved_errno;
1259 * @signum: the signal number. See the <literal>signal</literal>
1262 * Returns a string describing the given signal, e.g. "Segmentation fault".
1263 * You should use this function in preference to strsignal(), because it
1264 * returns a string in UTF-8 encoding, and since not all platforms support
1265 * the strsignal() function.
1267 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1268 * it returns "unknown signal (<signum>)".
1271 g_strsignal (gint signum)
1277 msg = tofree = NULL;
1279 #ifdef HAVE_STRSIGNAL
1280 msg = strsignal (signum);
1281 if (!g_get_charset (NULL))
1282 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1286 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1287 ret = g_intern_string (msg);
1293 /* Functions g_strlcpy and g_strlcat were originally developed by
1294 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1295 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1296 * for more information.
1300 /* Use the native ones, if available; they might be implemented in assembly */
1302 g_strlcpy (gchar *dest,
1306 g_return_val_if_fail (dest != NULL, 0);
1307 g_return_val_if_fail (src != NULL, 0);
1309 return strlcpy (dest, src, dest_size);
1313 g_strlcat (gchar *dest,
1317 g_return_val_if_fail (dest != NULL, 0);
1318 g_return_val_if_fail (src != NULL, 0);
1320 return strlcat (dest, src, dest_size);
1323 #else /* ! HAVE_STRLCPY */
1326 * @dest: destination buffer
1327 * @src: source buffer
1328 * @dest_size: length of @dest in bytes
1330 * Portability wrapper that calls strlcpy() on systems which have it,
1331 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1332 * guaranteed to be nul-terminated; @src must be nul-terminated;
1333 * @dest_size is the buffer size, not the number of bytes to copy.
1335 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1336 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1337 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1338 * returns the size of the attempted result, strlen (src), so if
1339 * @retval >= @dest_size, truncation occurred.
1341 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1342 * but if you really want to avoid screwups, g_strdup() is an even better
1345 * Returns: length of @src
1348 g_strlcpy (gchar *dest,
1352 register gchar *d = dest;
1353 register const gchar *s = src;
1354 register gsize n = dest_size;
1356 g_return_val_if_fail (dest != NULL, 0);
1357 g_return_val_if_fail (src != NULL, 0);
1359 /* Copy as many bytes as will fit */
1360 if (n != 0 && --n != 0)
1363 register gchar c = *s++;
1371 /* If not enough room in dest, add NUL and traverse rest of src */
1380 return s - src - 1; /* count does not include NUL */
1385 * @dest: destination buffer, already containing one nul-terminated string
1386 * @src: source buffer
1387 * @dest_size: length of @dest buffer in bytes (not length of existing string
1390 * Portability wrapper that calls strlcat() on systems which have it,
1391 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1392 * guaranteeing nul-termination for @dest. The total size of @dest won't
1393 * exceed @dest_size.
1395 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1396 * @dest_size is the full size of dest, not the space left over. This
1397 * function does not allocate memory. It always nul-terminates (unless
1398 * @dest_size == 0 or there were no nul characters in the @dest_size
1399 * characters of dest to start with).
1401 * Caveat: this is supposedly a more secure alternative to strcat() or
1402 * strncat(), but for real security g_strconcat() is harder to mess up.
1404 * Returns: size of attempted result, which is MIN (dest_size, strlen
1405 * (original dest)) + strlen (src), so if retval >= dest_size,
1406 * truncation occurred.
1409 g_strlcat (gchar *dest,
1413 register gchar *d = dest;
1414 register const gchar *s = src;
1415 register gsize bytes_left = dest_size;
1416 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1418 g_return_val_if_fail (dest != NULL, 0);
1419 g_return_val_if_fail (src != NULL, 0);
1421 /* Find the end of dst and adjust bytes left but don't go past end */
1422 while (*d != 0 && bytes_left-- != 0)
1425 bytes_left = dest_size - dlength;
1427 if (bytes_left == 0)
1428 return dlength + strlen (s);
1432 if (bytes_left != 1)
1441 return dlength + (s - src); /* count does not include NUL */
1443 #endif /* ! HAVE_STRLCPY */
1448 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1450 * Converts all upper case ASCII letters to lower case ASCII letters.
1452 * Return value: a newly-allocated string, with all the upper case
1453 * characters in @str converted to lower case, with semantics that
1454 * exactly match g_ascii_tolower(). (Note that this is unlike the
1455 * old g_strdown(), which modified the string in place.)
1458 g_ascii_strdown (const gchar *str,
1463 g_return_val_if_fail (str != NULL, NULL);
1468 result = g_strndup (str, len);
1469 for (s = result; *s; s++)
1470 *s = g_ascii_tolower (*s);
1478 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1480 * Converts all lower case ASCII letters to upper case ASCII letters.
1482 * Return value: a newly allocated string, with all the lower case
1483 * characters in @str converted to upper case, with semantics that
1484 * exactly match g_ascii_toupper(). (Note that this is unlike the
1485 * old g_strup(), which modified the string in place.)
1488 g_ascii_strup (const gchar *str,
1493 g_return_val_if_fail (str != NULL, NULL);
1498 result = g_strndup (str, len);
1499 for (s = result; *s; s++)
1500 *s = g_ascii_toupper (*s);
1509 * Determines if a string is pure ASCII. A string is pure ASCII if it
1510 * contains no bytes with the high bit set.
1512 * Returns: %TRUE if @str is ASCII
1517 g_str_is_ascii (const gchar *str)
1521 for (i = 0; str[i]; i++)
1530 * @string: the string to convert.
1532 * Converts a string to lower case.
1534 * Return value: the string
1536 * Deprecated:2.2: This function is totally broken for the reasons discussed
1537 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1541 g_strdown (gchar *string)
1545 g_return_val_if_fail (string != NULL, NULL);
1547 s = (guchar *) string;
1556 return (gchar *) string;
1561 * @string: the string to convert
1563 * Converts a string to upper case.
1565 * Return value: the string
1567 * Deprecated:2.2: This function is totally broken for the reasons
1568 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1569 * or g_utf8_strup() instead.
1572 g_strup (gchar *string)
1576 g_return_val_if_fail (string != NULL, NULL);
1578 s = (guchar *) string;
1587 return (gchar *) string;
1592 * @string: the string to reverse
1594 * Reverses all of the bytes in a string. For example,
1595 * <literal>g_strreverse ("abcdef")</literal> will result
1598 * Note that g_strreverse() doesn't work on UTF-8 strings
1599 * containing multibyte characters. For that purpose, use
1600 * g_utf8_strreverse().
1602 * Returns: the same pointer passed in as @string
1605 g_strreverse (gchar *string)
1607 g_return_val_if_fail (string != NULL, NULL);
1611 register gchar *h, *t;
1614 t = string + strlen (string) - 1;
1635 * Convert a character to ASCII lower case.
1637 * Unlike the standard C library tolower() function, this only
1638 * recognizes standard ASCII letters and ignores the locale, returning
1639 * all non-ASCII characters unchanged, even if they are lower case
1640 * letters in a particular character set. Also unlike the standard
1641 * library function, this takes and returns a char, not an int, so
1642 * don't call it on %EOF but no need to worry about casting to #guchar
1643 * before passing a possibly non-ASCII character in.
1645 * Return value: the result of converting @c to lower case. If @c is
1646 * not an ASCII upper case letter, @c is returned unchanged.
1649 g_ascii_tolower (gchar c)
1651 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1658 * Convert a character to ASCII upper case.
1660 * Unlike the standard C library toupper() function, this only
1661 * recognizes standard ASCII letters and ignores the locale, returning
1662 * all non-ASCII characters unchanged, even if they are upper case
1663 * letters in a particular character set. Also unlike the standard
1664 * library function, this takes and returns a char, not an int, so
1665 * don't call it on %EOF but no need to worry about casting to #guchar
1666 * before passing a possibly non-ASCII character in.
1668 * Return value: the result of converting @c to upper case. If @c is not
1669 * an ASCII lower case letter, @c is returned unchanged.
1672 g_ascii_toupper (gchar c)
1674 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1678 * g_ascii_digit_value:
1679 * @c: an ASCII character
1681 * Determines the numeric value of a character as a decimal digit.
1682 * Differs from g_unichar_digit_value() because it takes a char, so
1683 * there's no worry about sign extension if characters are signed.
1685 * Return value: If @c is a decimal digit (according to g_ascii_isdigit()),
1686 * its numeric value. Otherwise, -1.
1689 g_ascii_digit_value (gchar c)
1691 if (g_ascii_isdigit (c))
1697 * g_ascii_xdigit_value:
1698 * @c: an ASCII character.
1700 * Determines the numeric value of a character as a hexidecimal
1701 * digit. Differs from g_unichar_xdigit_value() because it takes
1702 * a char, so there's no worry about sign extension if characters
1705 * Return value: If @c is a hex digit (according to g_ascii_isxdigit()),
1706 * its numeric value. Otherwise, -1.
1709 g_ascii_xdigit_value (gchar c)
1711 if (c >= 'A' && c <= 'F')
1712 return c - 'A' + 10;
1713 if (c >= 'a' && c <= 'f')
1714 return c - 'a' + 10;
1715 return g_ascii_digit_value (c);
1719 * g_ascii_strcasecmp:
1720 * @s1: string to compare with @s2
1721 * @s2: string to compare with @s1
1723 * Compare two strings, ignoring the case of ASCII characters.
1725 * Unlike the BSD strcasecmp() function, this only recognizes standard
1726 * ASCII letters and ignores the locale, treating all non-ASCII
1727 * bytes as if they are not letters.
1729 * This function should be used only on strings that are known to be
1730 * in encodings where the bytes corresponding to ASCII letters always
1731 * represent themselves. This includes UTF-8 and the ISO-8859-*
1732 * charsets, but not for instance double-byte encodings like the
1733 * Windows Codepage 932, where the trailing bytes of double-byte
1734 * characters include all ASCII letters. If you compare two CP932
1735 * strings using this function, you will get false matches.
1737 * Both @s1 and @s2 must be non-%NULL.
1739 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1740 * or a positive value if @s1 > @s2.
1743 g_ascii_strcasecmp (const gchar *s1,
1748 g_return_val_if_fail (s1 != NULL, 0);
1749 g_return_val_if_fail (s2 != NULL, 0);
1753 c1 = (gint)(guchar) TOLOWER (*s1);
1754 c2 = (gint)(guchar) TOLOWER (*s2);
1760 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1764 * g_ascii_strncasecmp:
1765 * @s1: string to compare with @s2
1766 * @s2: string to compare with @s1
1767 * @n: number of characters to compare
1769 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1770 * characters after the first @n in each string.
1772 * Unlike the BSD strcasecmp() function, this only recognizes standard
1773 * ASCII letters and ignores the locale, treating all non-ASCII
1774 * characters as if they are not letters.
1776 * The same warning as in g_ascii_strcasecmp() applies: Use this
1777 * function only on strings known to be in encodings where bytes
1778 * corresponding to ASCII letters always represent themselves.
1780 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1781 * or a positive value if @s1 > @s2.
1784 g_ascii_strncasecmp (const gchar *s1,
1790 g_return_val_if_fail (s1 != NULL, 0);
1791 g_return_val_if_fail (s2 != NULL, 0);
1793 while (n && *s1 && *s2)
1796 c1 = (gint)(guchar) TOLOWER (*s1);
1797 c2 = (gint)(guchar) TOLOWER (*s2);
1804 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1812 * @s2: a string to compare with @s1
1814 * A case-insensitive string comparison, corresponding to the standard
1815 * strcasecmp() function on platforms which support it.
1817 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1818 * or a positive value if @s1 > @s2.
1820 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1821 * function is deprecated and how to replace it.
1824 g_strcasecmp (const gchar *s1,
1827 #ifdef HAVE_STRCASECMP
1828 g_return_val_if_fail (s1 != NULL, 0);
1829 g_return_val_if_fail (s2 != NULL, 0);
1831 return strcasecmp (s1, s2);
1835 g_return_val_if_fail (s1 != NULL, 0);
1836 g_return_val_if_fail (s2 != NULL, 0);
1840 /* According to A. Cox, some platforms have islower's that
1841 * don't work right on non-uppercase
1843 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1844 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1850 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1857 * @s2: a string to compare with @s1
1858 * @n: the maximum number of characters to compare
1860 * A case-insensitive string comparison, corresponding to the standard
1861 * strncasecmp() function on platforms which support it. It is similar
1862 * to g_strcasecmp() except it only compares the first @n characters of
1865 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1866 * or a positive value if @s1 > @s2.
1868 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1869 * the comparison by calling toupper()/tolower(). These functions
1870 * are locale-specific and operate on single bytes. However, it is
1871 * impossible to handle things correctly from an internationalization
1872 * standpoint by operating on bytes, since characters may be multibyte.
1873 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1874 * ASCII, since it is locale-sensitive, and it's broken if your string
1875 * is localized, since it doesn't work on many encodings at all,
1876 * including UTF-8, EUC-JP, etc.
1878 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1879 * which only works on ASCII and is not locale-sensitive, and
1880 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1881 * which is good for case-insensitive sorting of UTF-8.
1884 g_strncasecmp (const gchar *s1,
1888 #ifdef HAVE_STRNCASECMP
1889 return strncasecmp (s1, s2, n);
1893 g_return_val_if_fail (s1 != NULL, 0);
1894 g_return_val_if_fail (s2 != NULL, 0);
1896 while (n && *s1 && *s2)
1899 /* According to A. Cox, some platforms have islower's that
1900 * don't work right on non-uppercase
1902 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1903 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1910 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1918 * @string: the string to convert
1919 * @delimiters: (allow-none): a string containing the current delimiters,
1920 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1921 * @new_delimiter: the new delimiter character
1923 * Converts any delimiter characters in @string to @new_delimiter.
1924 * Any characters in @string which are found in @delimiters are
1925 * changed to the @new_delimiter character. Modifies @string in place,
1926 * and returns @string itself, not a copy. The return value is to
1927 * allow nesting such as
1928 * |[<!-- language="C" -->
1929 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
1935 g_strdelimit (gchar *string,
1936 const gchar *delimiters,
1941 g_return_val_if_fail (string != NULL, NULL);
1944 delimiters = G_STR_DELIMITERS;
1946 for (c = string; *c; c++)
1948 if (strchr (delimiters, *c))
1957 * @string: a nul-terminated array of bytes
1958 * @valid_chars: bytes permitted in @string
1959 * @substitutor: replacement character for disallowed bytes
1961 * For each character in @string, if the character is not in @valid_chars,
1962 * replaces the character with @substitutor. Modifies @string in place,
1963 * and return @string itself, not a copy. The return value is to allow
1965 * |[<!-- language="C" -->
1966 * g_ascii_strup (g_strcanon (str, "abc", '?'))
1972 g_strcanon (gchar *string,
1973 const gchar *valid_chars,
1978 g_return_val_if_fail (string != NULL, NULL);
1979 g_return_val_if_fail (valid_chars != NULL, NULL);
1981 for (c = string; *c; c++)
1983 if (!strchr (valid_chars, *c))
1992 * @source: a string to compress
1994 * Replaces all escaped characters with their one byte equivalent.
1996 * This function does the reverse conversion of g_strescape().
1998 * Returns: a newly-allocated copy of @source with all escaped
1999 * character compressed
2002 g_strcompress (const gchar *source)
2004 const gchar *p = source, *octal;
2008 g_return_val_if_fail (source != NULL, NULL);
2010 dest = g_malloc (strlen (source) + 1);
2021 g_warning ("g_strcompress: trailing \\");
2023 case '0': case '1': case '2': case '3': case '4':
2024 case '5': case '6': case '7':
2027 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2029 *q = (*q * 8) + (*p - '0');
2053 default: /* Also handles \" and \\ */
2070 * @source: a string to escape
2071 * @exceptions: a string of characters not to escape in @source
2073 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2074 * and '"' in the string @source by inserting a '\' before
2075 * them. Additionally all characters in the range 0x01-0x1F (everything
2076 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2077 * replaced with a '\' followed by their octal representation.
2078 * Characters supplied in @exceptions are not escaped.
2080 * g_strcompress() does the reverse conversion.
2082 * Returns: a newly-allocated copy of @source with certain
2083 * characters escaped. See above.
2086 g_strescape (const gchar *source,
2087 const gchar *exceptions)
2094 g_return_val_if_fail (source != NULL, NULL);
2096 p = (guchar *) source;
2097 /* Each source byte needs maximally four destination chars (\777) */
2098 q = dest = g_malloc (strlen (source) * 4 + 1);
2100 memset (excmap, 0, 256);
2103 guchar *e = (guchar *) exceptions;
2153 if ((*p < ' ') || (*p >= 0177))
2156 *q++ = '0' + (((*p) >> 6) & 07);
2157 *q++ = '0' + (((*p) >> 3) & 07);
2158 *q++ = '0' + ((*p) & 07);
2173 * @string: a string to remove the leading whitespace from
2175 * Removes leading whitespace from a string, by moving the rest
2176 * of the characters forward.
2178 * This function doesn't allocate or reallocate any memory;
2179 * it modifies @string in place. Therefore, it cannot be used on
2180 * statically allocated strings.
2182 * The pointer to @string is returned to allow the nesting of functions.
2184 * Also see g_strchomp() and g_strstrip().
2189 g_strchug (gchar *string)
2193 g_return_val_if_fail (string != NULL, NULL);
2195 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2198 memmove (string, start, strlen ((gchar *) start) + 1);
2205 * @string: a string to remove the trailing whitespace from
2207 * Removes trailing whitespace from a string.
2209 * This function doesn't allocate or reallocate any memory;
2210 * it modifies @string in place. Therefore, it cannot be used
2211 * on statically allocated strings.
2213 * The pointer to @string is returned to allow the nesting of functions.
2215 * Also see g_strchug() and g_strstrip().
2220 g_strchomp (gchar *string)
2224 g_return_val_if_fail (string != NULL, NULL);
2226 len = strlen (string);
2229 if (g_ascii_isspace ((guchar) string[len]))
2240 * @string: a string to split
2241 * @delimiter: a string which specifies the places at which to split
2242 * the string. The delimiter is not included in any of the resulting
2243 * strings, unless @max_tokens is reached.
2244 * @max_tokens: the maximum number of pieces to split @string into.
2245 * If this is less than 1, the string is split completely.
2247 * Splits a string into a maximum of @max_tokens pieces, using the given
2248 * @delimiter. If @max_tokens is reached, the remainder of @string is
2249 * appended to the last token.
2251 * As a special case, the result of splitting the empty string "" is an empty
2252 * vector, not a vector containing a single string. The reason for this
2253 * special case is that being able to represent a empty vector is typically
2254 * more useful than consistent handling of empty elements. If you do need
2255 * to represent empty elements, you'll need to check for the empty string
2256 * before calling g_strsplit().
2258 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2259 * g_strfreev() to free it.
2262 g_strsplit (const gchar *string,
2263 const gchar *delimiter,
2266 GSList *string_list = NULL, *slist;
2267 gchar **str_array, *s;
2269 const gchar *remainder;
2271 g_return_val_if_fail (string != NULL, NULL);
2272 g_return_val_if_fail (delimiter != NULL, NULL);
2273 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2276 max_tokens = G_MAXINT;
2279 s = strstr (remainder, delimiter);
2282 gsize delimiter_len = strlen (delimiter);
2284 while (--max_tokens && s)
2288 len = s - remainder;
2289 string_list = g_slist_prepend (string_list,
2290 g_strndup (remainder, len));
2292 remainder = s + delimiter_len;
2293 s = strstr (remainder, delimiter);
2299 string_list = g_slist_prepend (string_list, g_strdup (remainder));
2302 str_array = g_new (gchar*, n + 1);
2304 str_array[n--] = NULL;
2305 for (slist = string_list; slist; slist = slist->next)
2306 str_array[n--] = slist->data;
2308 g_slist_free (string_list);
2315 * @string: The string to be tokenized
2316 * @delimiters: A nul-terminated string containing bytes that are used
2317 * to split the string.
2318 * @max_tokens: The maximum number of tokens to split @string into.
2319 * If this is less than 1, the string is split completely
2321 * Splits @string into a number of tokens not containing any of the characters
2322 * in @delimiter. A token is the (possibly empty) longest string that does not
2323 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2324 * remainder is appended to the last token.
2326 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2327 * %NULL-terminated vector containing the three strings "abc", "def",
2330 * The result if g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2331 * vector containing the four strings "", "def", "ghi", and "".
2333 * As a special case, the result of splitting the empty string "" is an empty
2334 * vector, not a vector containing a single string. The reason for this
2335 * special case is that being able to represent a empty vector is typically
2336 * more useful than consistent handling of empty elements. If you do need
2337 * to represent empty elements, you'll need to check for the empty string
2338 * before calling g_strsplit_set().
2340 * Note that this function works on bytes not characters, so it can't be used
2341 * to delimit UTF-8 strings for anything but ASCII characters.
2343 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2344 * g_strfreev() to free it.
2349 g_strsplit_set (const gchar *string,
2350 const gchar *delimiters,
2353 gboolean delim_table[256];
2354 GSList *tokens, *list;
2357 const gchar *current;
2361 g_return_val_if_fail (string != NULL, NULL);
2362 g_return_val_if_fail (delimiters != NULL, NULL);
2365 max_tokens = G_MAXINT;
2367 if (*string == '\0')
2369 result = g_new (char *, 1);
2374 memset (delim_table, FALSE, sizeof (delim_table));
2375 for (s = delimiters; *s != '\0'; ++s)
2376 delim_table[*(guchar *)s] = TRUE;
2381 s = current = string;
2384 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2386 token = g_strndup (current, s - current);
2387 tokens = g_slist_prepend (tokens, token);
2396 token = g_strndup (current, s - current);
2397 tokens = g_slist_prepend (tokens, token);
2400 result = g_new (gchar *, n_tokens + 1);
2402 result[n_tokens] = NULL;
2403 for (list = tokens; list != NULL; list = list->next)
2404 result[--n_tokens] = list->data;
2406 g_slist_free (tokens);
2413 * @str_array: a %NULL-terminated array of strings to free
2415 * Frees a %NULL-terminated array of strings, and the array itself.
2416 * If called on a %NULL value, g_strfreev() simply returns.
2419 g_strfreev (gchar **str_array)
2425 for (i = 0; str_array[i] != NULL; i++)
2426 g_free (str_array[i]);
2434 * @str_array: a %NULL-terminated array of strings
2436 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2437 * the new array should be freed by first freeing each string, then
2438 * the array itself. g_strfreev() does this for you. If called
2439 * on a %NULL value, g_strdupv() simply returns %NULL.
2441 * Return value: a new %NULL-terminated array of strings.
2444 g_strdupv (gchar **str_array)
2452 while (str_array[i])
2455 retval = g_new (gchar*, i + 1);
2458 while (str_array[i])
2460 retval[i] = g_strdup (str_array[i]);
2473 * @separator: (allow-none): a string to insert between each of the
2475 * @str_array: a %NULL-terminated array of strings to join
2477 * Joins a number of strings together to form one long string, with the
2478 * optional @separator inserted between each of them. The returned string
2479 * should be freed with g_free().
2481 * Returns: a newly-allocated string containing all of the strings joined
2482 * together, with @separator between them
2485 g_strjoinv (const gchar *separator,
2491 g_return_val_if_fail (str_array != NULL, NULL);
2493 if (separator == NULL)
2500 gsize separator_len;
2502 separator_len = strlen (separator);
2503 /* First part, getting length */
2504 len = 1 + strlen (str_array[0]);
2505 for (i = 1; str_array[i] != NULL; i++)
2506 len += strlen (str_array[i]);
2507 len += separator_len * (i - 1);
2509 /* Second part, building string */
2510 string = g_new (gchar, len);
2511 ptr = g_stpcpy (string, *str_array);
2512 for (i = 1; str_array[i] != NULL; i++)
2514 ptr = g_stpcpy (ptr, separator);
2515 ptr = g_stpcpy (ptr, str_array[i]);
2519 string = g_strdup ("");
2526 * @separator: (allow-none): a string to insert between each of the
2528 * @...: a %NULL-terminated list of strings to join
2530 * Joins a number of strings together to form one long string, with the
2531 * optional @separator inserted between each of them. The returned string
2532 * should be freed with g_free().
2534 * Returns: a newly-allocated string containing all of the strings joined
2535 * together, with @separator between them
2538 g_strjoin (const gchar *separator,
2544 gsize separator_len;
2547 if (separator == NULL)
2550 separator_len = strlen (separator);
2552 va_start (args, separator);
2554 s = va_arg (args, gchar*);
2558 /* First part, getting length */
2559 len = 1 + strlen (s);
2561 s = va_arg (args, gchar*);
2564 len += separator_len + strlen (s);
2565 s = va_arg (args, gchar*);
2569 /* Second part, building string */
2570 string = g_new (gchar, len);
2572 va_start (args, separator);
2574 s = va_arg (args, gchar*);
2575 ptr = g_stpcpy (string, s);
2577 s = va_arg (args, gchar*);
2580 ptr = g_stpcpy (ptr, separator);
2581 ptr = g_stpcpy (ptr, s);
2582 s = va_arg (args, gchar*);
2586 string = g_strdup ("");
2596 * @haystack: a string
2597 * @haystack_len: the maximum length of @haystack. Note that -1 is
2598 * a valid length, if @haystack is nul-terminated, meaning it will
2599 * search through the whole string.
2600 * @needle: the string to search for
2602 * Searches the string @haystack for the first occurrence
2603 * of the string @needle, limiting the length of the search
2606 * Return value: a pointer to the found occurrence, or
2607 * %NULL if not found.
2610 g_strstr_len (const gchar *haystack,
2611 gssize haystack_len,
2612 const gchar *needle)
2614 g_return_val_if_fail (haystack != NULL, NULL);
2615 g_return_val_if_fail (needle != NULL, NULL);
2617 if (haystack_len < 0)
2618 return strstr (haystack, needle);
2621 const gchar *p = haystack;
2622 gsize needle_len = strlen (needle);
2626 if (needle_len == 0)
2627 return (gchar *)haystack;
2629 if (haystack_len < needle_len)
2632 end = haystack + haystack_len - needle_len;
2634 while (p <= end && *p)
2636 for (i = 0; i < needle_len; i++)
2637 if (p[i] != needle[i])
2652 * @haystack: a nul-terminated string
2653 * @needle: the nul-terminated string to search for
2655 * Searches the string @haystack for the last occurrence
2656 * of the string @needle.
2658 * Return value: a pointer to the found occurrence, or
2659 * %NULL if not found.
2662 g_strrstr (const gchar *haystack,
2663 const gchar *needle)
2670 g_return_val_if_fail (haystack != NULL, NULL);
2671 g_return_val_if_fail (needle != NULL, NULL);
2673 needle_len = strlen (needle);
2674 haystack_len = strlen (haystack);
2676 if (needle_len == 0)
2677 return (gchar *)haystack;
2679 if (haystack_len < needle_len)
2682 p = haystack + haystack_len - needle_len;
2684 while (p >= haystack)
2686 for (i = 0; i < needle_len; i++)
2687 if (p[i] != needle[i])
2701 * @haystack: a nul-terminated string
2702 * @haystack_len: the maximum length of @haystack
2703 * @needle: the nul-terminated string to search for
2705 * Searches the string @haystack for the last occurrence
2706 * of the string @needle, limiting the length of the search
2709 * Return value: a pointer to the found occurrence, or
2710 * %NULL if not found.
2713 g_strrstr_len (const gchar *haystack,
2714 gssize haystack_len,
2715 const gchar *needle)
2717 g_return_val_if_fail (haystack != NULL, NULL);
2718 g_return_val_if_fail (needle != NULL, NULL);
2720 if (haystack_len < 0)
2721 return g_strrstr (haystack, needle);
2724 gsize needle_len = strlen (needle);
2725 const gchar *haystack_max = haystack + haystack_len;
2726 const gchar *p = haystack;
2729 while (p < haystack_max && *p)
2732 if (p < haystack + needle_len)
2737 while (p >= haystack)
2739 for (i = 0; i < needle_len; i++)
2740 if (p[i] != needle[i])
2756 * @str: a nul-terminated string
2757 * @suffix: the nul-terminated suffix to look for
2759 * Looks whether the string @str ends with @suffix.
2761 * Return value: %TRUE if @str end with @suffix, %FALSE otherwise.
2766 g_str_has_suffix (const gchar *str,
2767 const gchar *suffix)
2772 g_return_val_if_fail (str != NULL, FALSE);
2773 g_return_val_if_fail (suffix != NULL, FALSE);
2775 str_len = strlen (str);
2776 suffix_len = strlen (suffix);
2778 if (str_len < suffix_len)
2781 return strcmp (str + str_len - suffix_len, suffix) == 0;
2786 * @str: a nul-terminated string
2787 * @prefix: the nul-terminated prefix to look for
2789 * Looks whether the string @str begins with @prefix.
2791 * Return value: %TRUE if @str begins with @prefix, %FALSE otherwise.
2796 g_str_has_prefix (const gchar *str,
2797 const gchar *prefix)
2802 g_return_val_if_fail (str != NULL, FALSE);
2803 g_return_val_if_fail (prefix != NULL, FALSE);
2805 str_len = strlen (str);
2806 prefix_len = strlen (prefix);
2808 if (str_len < prefix_len)
2811 return strncmp (str, prefix, prefix_len) == 0;
2816 * @str_array: a %NULL-terminated array of strings
2818 * Returns the length of the given %NULL-terminated
2819 * string array @str_array.
2821 * Return value: length of @str_array.
2826 g_strv_length (gchar **str_array)
2830 g_return_val_if_fail (str_array != NULL, 0);
2832 while (str_array[i])
2839 index_add_folded (GPtrArray *array,
2845 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2847 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2848 if (strstr (normal, "ı") || strstr (normal, "İ"))
2853 tmp = g_string_new (NULL);
2859 i = strstr (s, "ı");
2860 I = strstr (s, "İ");
2873 g_string_append_len (tmp, s, e - s);
2874 g_string_append_c (tmp, 'i');
2875 s = g_utf8_next_char (e);
2878 g_string_append (tmp, s);
2880 normal = g_string_free (tmp, FALSE);
2883 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2888 split_words (const gchar *value)
2890 const gchar *start = NULL;
2894 result = g_ptr_array_new ();
2896 for (s = value; *s; s = g_utf8_next_char (s))
2898 gunichar c = g_utf8_get_char (s);
2902 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2907 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2909 index_add_folded (result, start, s);
2916 index_add_folded (result, start, s);
2918 g_ptr_array_add (result, NULL);
2920 return (gchar **) g_ptr_array_free (result, FALSE);
2924 * g_str_tokenize_and_fold:
2926 * @translit_locale: (allow-none): the language code (like 'de' or
2927 * 'en_GB') from which @string originates
2928 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2929 * return location for ASCII alternates
2931 * Tokenises @string and performs folding on each token.
2933 * A token is a non-empty sequence of alphanumeric characters in the
2934 * source string, separated by non-alphanumeric characters. An
2935 * "alphanumeric" character for this purpose is one that matches
2936 * g_unichar_isalnum() or g_unichar_ismark().
2938 * Each token is then (Unicode) normalised and case-folded. If
2939 * @ascii_alternates is non-%NULL and some of the returned tokens
2940 * contain non-ASCII characters, ASCII alternatives will be generated.
2942 * The number of ASCII alternatives that are generated and the method
2943 * for doing so is unspecified, but @translit_locale (if specified) may
2944 * improve the transliteration if the language of the source string is
2947 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
2952 g_str_tokenize_and_fold (const gchar *string,
2953 const gchar *translit_locale,
2954 gchar ***ascii_alternates)
2958 if (ascii_alternates && g_str_is_ascii (string))
2960 *ascii_alternates = g_new0 (gchar *, 0 + 1);
2961 ascii_alternates = NULL;
2964 result = split_words (string);
2966 /* TODO: proper iconv transliteration (locale-dependent) */
2967 if (ascii_alternates)
2971 n = g_strv_length (result);
2972 *ascii_alternates = g_new (gchar *, n + 1);
2975 for (i = 0; i < n; i++)
2977 if (!g_str_is_ascii (result[i]))
2984 decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL);
2985 ascii = g_malloc (strlen (decomposed) + 1);
2987 for (k = 0; decomposed[k]; k++)
2988 if (~decomposed[k] & 0x80)
2989 ascii[l++] = decomposed[k];
2992 (*ascii_alternates)[j++] = ascii;
2993 g_free (decomposed);
2997 (*ascii_alternates)[j] = NULL;
3004 * g_str_match_string:
3005 * @search_term: the search term from the user
3006 * @potential_hit: the text that may be a hit
3007 * @accept_alternates: %TRUE to accept ASCII alternates
3009 * Checks if a search conducted for @search_term should match
3012 * This function calls g_str_tokenize_and_fold() on both
3013 * @search_term and @potential_hit. ASCII alternates are never taken
3014 * for @search_term but will be taken for @potential_hit according to
3015 * the value of @accept_alternates.
3017 * A hit occurs when each folded token in @search_term is a prefix of a
3018 * folded token from @potential_hit.
3020 * Depending on how you're performing the search, it will typically be
3021 * faster to call g_str_tokenize_and_fold() on each string in
3022 * your corpus and build an index on the returned folded tokens, then
3023 * call g_str_tokenize_and_fold() on the search term and
3024 * perform lookups into that index.
3026 * As some examples, searching for "fred" would match the potential hit
3027 * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match
3028 * "Frédéric" but not "Frederic" (due to the one-directional nature of
3029 * accent matching). Searching "fo" would match "Foo" and "Bar Foo
3030 * Baz", but not "SFO" (because no word as "fo" as a prefix).
3032 * Returns: %TRUE if @potential_hit is a hit
3037 g_str_match_string (const gchar *search_term,
3038 const gchar *potential_hit,
3039 gboolean accept_alternates)
3041 gchar **alternates = NULL;
3042 gchar **term_tokens;
3047 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3048 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3052 for (i = 0; term_tokens[i]; i++)
3054 for (j = 0; hit_tokens[j]; j++)
3055 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3058 if (accept_alternates)
3059 for (j = 0; alternates[j]; j++)
3060 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3070 g_strfreev (term_tokens);
3071 g_strfreev (hit_tokens);
3072 g_strfreev (alternates);