1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
40 #include <ctype.h> /* For tolower() */
43 /* Needed on BSD/OS X for e.g. strtod_l */
51 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
53 #include "gstrfuncs.h"
56 #include "gprintfint.h"
61 * SECTION:string_utils
62 * @title: String Utility Functions
63 * @short_description: various string-related functions
65 * This section describes a number of utility functions for creating,
66 * duplicating, and manipulating strings.
68 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
69 * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
70 * are declared in the header <filename>gprintf.h</filename> which is
71 * <emphasis>not</emphasis> included in <filename>glib.h</filename>
72 * (otherwise using <filename>glib.h</filename> would drag in
73 * <filename>stdio.h</filename>), so you'll have to explicitly include
74 * <literal><glib/gprintf.h></literal> in order to use the GLib
77 * <para id="string-precision">While you may use the printf() functions
78 * to format UTF-8 strings, notice that the precision of a
79 * <literal>%Ns</literal> parameter is interpreted as the
80 * number of <emphasis>bytes</emphasis>, not <emphasis>characters</emphasis>
81 * to print. On top of that, the GNU libc implementation of the printf()
82 * functions has the "feature" that it checks that the string given for
83 * the <literal>%Ns</literal> parameter consists of a whole number
84 * of characters in the current encoding. So, unless you are sure you are
85 * always going to be in an UTF-8 locale or your know your text is restricted
86 * to ASCII, avoid using <literal>%Ns</literal>. If your intention is
87 * to format strings for a certain number of columns, then
88 * <literal>%Ns</literal> is not a correct solution anyway, since it
89 * fails to take wide characters (see g_unichar_iswide()) into account.
97 * Determines whether a character is alphanumeric.
99 * Unlike the standard C library isalnum() function, this only
100 * recognizes standard ASCII letters and ignores the locale,
101 * returning %FALSE for all non-ASCII characters. Also, unlike
102 * the standard library function, this takes a <type>char</type>,
103 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
104 * cast to #guchar before passing a possibly non-ASCII character in.
106 * Returns: %TRUE if @c is an ASCII alphanumeric character
113 * Determines whether a character is alphabetic (i.e. a letter).
115 * Unlike the standard C library isalpha() function, this only
116 * recognizes standard ASCII letters and ignores the locale,
117 * returning %FALSE for all non-ASCII characters. Also, unlike
118 * the standard library function, this takes a <type>char</type>,
119 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
120 * cast to #guchar before passing a possibly non-ASCII character in.
122 * Returns: %TRUE if @c is an ASCII alphabetic character
129 * Determines whether a character is a control character.
131 * Unlike the standard C library iscntrl() function, this only
132 * recognizes standard ASCII control characters and ignores the
133 * locale, returning %FALSE for all non-ASCII characters. Also,
134 * unlike the standard library function, this takes a <type>char</type>,
135 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
136 * cast to #guchar before passing a possibly non-ASCII character in.
138 * Returns: %TRUE if @c is an ASCII control character.
145 * Determines whether a character is digit (0-9).
147 * Unlike the standard C library isdigit() function, this takes
148 * a <type>char</type>, not an <type>int</type>, so don't call it
149 * on <literal>EOF</literal>, but no need to cast to #guchar before passing a possibly
150 * non-ASCII character in.
152 * Returns: %TRUE if @c is an ASCII digit.
159 * Determines whether a character is a printing character and not a space.
161 * Unlike the standard C library isgraph() function, this only
162 * recognizes standard ASCII characters and ignores the locale,
163 * returning %FALSE for all non-ASCII characters. Also, unlike
164 * the standard library function, this takes a <type>char</type>,
165 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need
166 * to cast to #guchar before passing a possibly non-ASCII character in.
168 * Returns: %TRUE if @c is an ASCII printing character other than space.
175 * Determines whether a character is an ASCII lower case letter.
177 * Unlike the standard C library islower() function, this only
178 * recognizes standard ASCII letters and ignores the locale,
179 * returning %FALSE for all non-ASCII characters. Also, unlike
180 * the standard library function, this takes a <type>char</type>,
181 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need
182 * to worry about casting to #guchar before passing a possibly
183 * non-ASCII character in.
185 * Returns: %TRUE if @c is an ASCII lower case letter
192 * Determines whether a character is a printing character.
194 * Unlike the standard C library isprint() function, this only
195 * recognizes standard ASCII characters and ignores the locale,
196 * returning %FALSE for all non-ASCII characters. Also, unlike
197 * the standard library function, this takes a <type>char</type>,
198 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need
199 * to cast to #guchar before passing a possibly non-ASCII character in.
201 * Returns: %TRUE if @c is an ASCII printing character.
208 * Determines whether a character is a punctuation character.
210 * Unlike the standard C library ispunct() function, this only
211 * recognizes standard ASCII letters and ignores the locale,
212 * returning %FALSE for all non-ASCII characters. Also, unlike
213 * the standard library function, this takes a <type>char</type>,
214 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
215 * cast to #guchar before passing a possibly non-ASCII character in.
217 * Returns: %TRUE if @c is an ASCII punctuation character.
224 * Determines whether a character is a white-space character.
226 * Unlike the standard C library isspace() function, this only
227 * recognizes standard ASCII white-space and ignores the locale,
228 * returning %FALSE for all non-ASCII characters. Also, unlike
229 * the standard library function, this takes a <type>char</type>,
230 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
231 * cast to #guchar before passing a possibly non-ASCII character in.
233 * Returns: %TRUE if @c is an ASCII white-space character
240 * Determines whether a character is an ASCII upper case letter.
242 * Unlike the standard C library isupper() function, this only
243 * recognizes standard ASCII letters and ignores the locale,
244 * returning %FALSE for all non-ASCII characters. Also, unlike
245 * the standard library function, this takes a <type>char</type>,
246 * not an <type>int</type>, so don't call it on <literal>EOF</literal>, but no need to
247 * worry about casting to #guchar before passing a possibly non-ASCII
250 * Returns: %TRUE if @c is an ASCII upper case letter
257 * Determines whether a character is a hexadecimal-digit character.
259 * Unlike the standard C library isxdigit() function, this takes
260 * a <type>char</type>, not an <type>int</type>, so don't call it
261 * on <literal>EOF</literal>, but no need to cast to #guchar before passing a
262 * possibly non-ASCII character in.
264 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
268 * G_ASCII_DTOSTR_BUF_SIZE:
270 * A good size for a buffer to be passed into g_ascii_dtostr().
271 * It is guaranteed to be enough for all output of that function
272 * on systems with 64bit IEEE-compatible doubles.
274 * The typical usage would be something like:
276 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
278 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
284 * @string: a string to remove the leading and trailing whitespace from
286 * Removes leading and trailing whitespace from a string.
287 * See g_strchomp() and g_strchug().
295 * The standard delimiters, used in g_strdelimit().
298 static const guint16 ascii_table_data[256] = {
299 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
300 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
303 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
305 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
306 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
307 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
310 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
311 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
314 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
315 /* the upper 128 are all zeroes */
318 const guint16 * const g_ascii_table = ascii_table_data;
320 #if defined (HAVE_NEWLOCALE) && \
321 defined (HAVE_USELOCALE) && \
322 defined (HAVE_STRTOD_L) && \
323 defined (HAVE_STRTOULL_L) && \
324 defined (HAVE_STRTOLL_L)
325 #define USE_XLOCALE 1
332 static gsize initialized = FALSE;
333 static locale_t C_locale = NULL;
335 if (g_once_init_enter (&initialized))
337 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
338 g_once_init_leave (&initialized, TRUE);
347 * @str: the string to duplicate
349 * Duplicates a string. If @str is %NULL it returns %NULL.
350 * The returned string should be freed with g_free()
351 * when no longer needed.
353 * Returns: a newly-allocated copy of @str
356 g_strdup (const gchar *str)
363 length = strlen (str) + 1;
364 new_str = g_new (char, length);
365 memcpy (new_str, str, length);
375 * @mem: the memory to copy.
376 * @byte_size: the number of bytes to copy.
378 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
379 * from @mem. If @mem is %NULL it returns %NULL.
381 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
385 g_memdup (gconstpointer mem,
392 new_mem = g_malloc (byte_size);
393 memcpy (new_mem, mem, byte_size);
403 * @str: the string to duplicate
404 * @n: the maximum number of bytes to copy from @str
406 * Duplicates the first @n bytes of a string, returning a newly-allocated
407 * buffer @n + 1 bytes long which will always be nul-terminated.
408 * If @str is less than @n bytes long the buffer is padded with nuls.
409 * If @str is %NULL it returns %NULL.
410 * The returned value should be freed when no longer needed.
413 * To copy a number of characters from a UTF-8 encoded string, use
414 * g_utf8_strncpy() instead.
417 * Returns: a newly-allocated buffer containing the first @n bytes
418 * of @str, nul-terminated
421 g_strndup (const gchar *str,
428 new_str = g_new (gchar, n + 1);
429 strncpy (new_str, str, n);
440 * @length: the length of the new string
441 * @fill_char: the byte to fill the string with
443 * Creates a new string @length bytes long filled with @fill_char.
444 * The returned string should be freed when no longer needed.
446 * Returns: a newly-allocated string filled the @fill_char
449 g_strnfill (gsize length,
454 str = g_new (gchar, length + 1);
455 memset (str, (guchar)fill_char, length);
463 * @dest: destination buffer.
464 * @src: source string.
466 * Copies a nul-terminated string into the dest buffer, include the
467 * trailing nul, and return a pointer to the trailing nul byte.
468 * This is useful for concatenating multiple strings together
469 * without having to repeatedly scan for the end.
471 * Return value: a pointer to trailing nul byte.
474 g_stpcpy (gchar *dest,
478 g_return_val_if_fail (dest != NULL, NULL);
479 g_return_val_if_fail (src != NULL, NULL);
480 return stpcpy (dest, src);
482 register gchar *d = dest;
483 register const gchar *s = src;
485 g_return_val_if_fail (dest != NULL, NULL);
486 g_return_val_if_fail (src != NULL, NULL);
489 while (*s++ != '\0');
497 * @format: a standard printf() format string, but notice
498 * <link linkend="string-precision">string precision pitfalls</link>
499 * @args: the list of parameters to insert into the format string
501 * Similar to the standard C vsprintf() function but safer, since it
502 * calculates the maximum space required and allocates memory to hold
503 * the result. The returned string should be freed with g_free() when
506 * See also g_vasprintf(), which offers the same functionality, but
507 * additionally returns the length of the allocated string.
509 * Returns: a newly-allocated string holding the result
512 g_strdup_vprintf (const gchar *format,
515 gchar *string = NULL;
517 g_vasprintf (&string, format, args);
524 * @format: a standard printf() format string, but notice
525 * <link linkend="string-precision">string precision pitfalls</link>
526 * @...: the parameters to insert into the format string
528 * Similar to the standard C sprintf() function but safer, since it
529 * calculates the maximum space required and allocates memory to hold
530 * the result. The returned string should be freed with g_free() when no
533 * Returns: a newly-allocated string holding the result
536 g_strdup_printf (const gchar *format,
542 va_start (args, format);
543 buffer = g_strdup_vprintf (format, args);
551 * @string1: the first string to add, which must not be %NULL
552 * @...: a %NULL-terminated list of strings to append to the string
554 * Concatenates all of the given strings into one long string.
555 * The returned string should be freed with g_free() when no longer needed.
557 * Note that this function is usually not the right function to use to
558 * assemble a translated message from pieces, since proper translation
559 * often requires the pieces to be reordered.
561 * <warning><para>The variable argument list <emphasis>must</emphasis> end
562 * with %NULL. If you forget the %NULL, g_strconcat() will start appending
563 * random memory junk to your string.</para></warning>
565 * Returns: a newly-allocated string containing all the string arguments
568 g_strconcat (const gchar *string1, ...)
579 l = 1 + strlen (string1);
580 va_start (args, string1);
581 s = va_arg (args, gchar*);
585 s = va_arg (args, gchar*);
589 concat = g_new (gchar, l);
592 ptr = g_stpcpy (ptr, string1);
593 va_start (args, string1);
594 s = va_arg (args, gchar*);
597 ptr = g_stpcpy (ptr, s);
598 s = va_arg (args, gchar*);
607 * @nptr: the string to convert to a numeric value.
608 * @endptr: if non-%NULL, it returns the character after
609 * the last character used in the conversion.
611 * Converts a string to a #gdouble value.
612 * It calls the standard strtod() function to handle the conversion, but
613 * if the string is not completely converted it attempts the conversion
614 * again with g_ascii_strtod(), and returns the best match.
616 * This function should seldom be used. The normal situation when reading
617 * numbers not for human consumption is to use g_ascii_strtod(). Only when
618 * you know that you must expect both locale formatted and C formatted numbers
619 * should you use this. Make sure that you don't pass strings such as comma
620 * separated lists of values, since the commas may be interpreted as a decimal
621 * point in some locales, causing unexpected results.
623 * Return value: the #gdouble value.
626 g_strtod (const gchar *nptr,
634 g_return_val_if_fail (nptr != NULL, 0);
639 val_1 = strtod (nptr, &fail_pos_1);
641 if (fail_pos_1 && fail_pos_1[0] != 0)
642 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
644 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
647 *endptr = fail_pos_1;
653 *endptr = fail_pos_2;
660 * @nptr: the string to convert to a numeric value.
661 * @endptr: if non-%NULL, it returns the character after
662 * the last character used in the conversion.
664 * Converts a string to a #gdouble value.
666 * This function behaves like the standard strtod() function
667 * does in the C locale. It does this without actually changing
668 * the current locale, since that would not be thread-safe.
669 * A limitation of the implementation is that this function
670 * will still accept localized versions of infinities and NANs.
672 * This function is typically used when reading configuration
673 * files or other non-user input that should be locale independent.
674 * To handle input from the user you should normally use the
675 * locale-sensitive system strtod() function.
677 * To convert from a #gdouble to a string in a locale-insensitive
678 * way, use g_ascii_dtostr().
680 * If the correct value would cause overflow, plus or minus <literal>HUGE_VAL</literal>
681 * is returned (according to the sign of the value), and <literal>ERANGE</literal> is
682 * stored in <literal>errno</literal>. If the correct value would cause underflow,
683 * zero is returned and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
685 * This function resets <literal>errno</literal> before calling strtod() so that
686 * you can reliably detect overflow and underflow.
688 * Return value: the #gdouble value.
691 g_ascii_strtod (const gchar *nptr,
696 g_return_val_if_fail (nptr != NULL, 0);
700 return strtod_l (nptr, endptr, get_C_locale ());
707 struct lconv *locale_data;
709 const char *decimal_point;
710 int decimal_point_len;
711 const char *p, *decimal_point_pos;
712 const char *end = NULL; /* Silence gcc */
715 g_return_val_if_fail (nptr != NULL, 0);
720 locale_data = localeconv ();
721 decimal_point = locale_data->decimal_point;
722 decimal_point_len = strlen (decimal_point);
725 decimal_point_len = 1;
728 g_assert (decimal_point_len != 0);
730 decimal_point_pos = NULL;
733 if (decimal_point[0] != '.' ||
734 decimal_point[1] != 0)
737 /* Skip leading space */
738 while (g_ascii_isspace (*p))
741 /* Skip leading optional sign */
742 if (*p == '+' || *p == '-')
746 (p[1] == 'x' || p[1] == 'X'))
749 /* HEX - find the (optional) decimal point */
751 while (g_ascii_isxdigit (*p))
755 decimal_point_pos = p++;
757 while (g_ascii_isxdigit (*p))
760 if (*p == 'p' || *p == 'P')
762 if (*p == '+' || *p == '-')
764 while (g_ascii_isdigit (*p))
769 else if (g_ascii_isdigit (*p) || *p == '.')
771 while (g_ascii_isdigit (*p))
775 decimal_point_pos = p++;
777 while (g_ascii_isdigit (*p))
780 if (*p == 'e' || *p == 'E')
782 if (*p == '+' || *p == '-')
784 while (g_ascii_isdigit (*p))
789 /* For the other cases, we need not convert the decimal point */
792 if (decimal_point_pos)
796 /* We need to convert the '.' to the locale specific decimal point */
797 copy = g_malloc (end - nptr + 1 + decimal_point_len);
800 memcpy (c, nptr, decimal_point_pos - nptr);
801 c += decimal_point_pos - nptr;
802 memcpy (c, decimal_point, decimal_point_len);
803 c += decimal_point_len;
804 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
805 c += end - (decimal_point_pos + 1);
809 val = strtod (copy, &fail_pos);
810 strtod_errno = errno;
814 if (fail_pos - copy > decimal_point_pos - nptr)
815 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
817 fail_pos = (char *)nptr + (fail_pos - copy);
827 copy = g_malloc (end - (char *)nptr + 1);
828 memcpy (copy, nptr, end - nptr);
829 *(copy + (end - (char *)nptr)) = 0;
832 val = strtod (copy, &fail_pos);
833 strtod_errno = errno;
837 fail_pos = (char *)nptr + (fail_pos - copy);
845 val = strtod (nptr, &fail_pos);
846 strtod_errno = errno;
852 errno = strtod_errno;
861 * @buffer: A buffer to place the resulting string in
862 * @buf_len: The length of the buffer.
863 * @d: The #gdouble to convert
865 * Converts a #gdouble to a string, using the '.' as
868 * This function generates enough precision that converting
869 * the string back using g_ascii_strtod() gives the same machine-number
870 * (on machines with IEEE compatible 64bit doubles). It is
871 * guaranteed that the size of the resulting string will never
872 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes.
874 * Return value: The pointer to the buffer with the converted string.
877 g_ascii_dtostr (gchar *buffer,
881 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
884 #pragma GCC diagnostic push
885 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
889 * @buffer: A buffer to place the resulting string in
890 * @buf_len: The length of the buffer.
891 * @format: The printf()-style format to use for the
892 * code to use for converting.
893 * @d: The #gdouble to convert
895 * Converts a #gdouble to a string, using the '.' as
896 * decimal point. To format the number you pass in
897 * a printf()-style format string. Allowed conversion
898 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
900 * If you just want to want to serialize the value into a
901 * string, use g_ascii_dtostr().
903 * Return value: The pointer to the buffer with the converted string.
906 g_ascii_formatd (gchar *buffer,
914 old_locale = uselocale (get_C_locale ());
915 _g_snprintf (buffer, buf_len, format, d);
916 uselocale (old_locale);
921 struct lconv *locale_data;
923 const char *decimal_point;
924 int decimal_point_len;
929 g_return_val_if_fail (buffer != NULL, NULL);
930 g_return_val_if_fail (format[0] == '%', NULL);
931 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
933 format_char = format[strlen (format) - 1];
935 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
936 format_char == 'f' || format_char == 'F' ||
937 format_char == 'g' || format_char == 'G',
940 if (format[0] != '%')
943 if (strpbrk (format + 1, "'l%"))
946 if (!(format_char == 'e' || format_char == 'E' ||
947 format_char == 'f' || format_char == 'F' ||
948 format_char == 'g' || format_char == 'G'))
951 _g_snprintf (buffer, buf_len, format, d);
954 locale_data = localeconv ();
955 decimal_point = locale_data->decimal_point;
956 decimal_point_len = strlen (decimal_point);
959 decimal_point_len = 1;
962 g_assert (decimal_point_len != 0);
964 if (decimal_point[0] != '.' ||
965 decimal_point[1] != 0)
969 while (g_ascii_isspace (*p))
972 if (*p == '+' || *p == '-')
975 while (isdigit ((guchar)*p))
978 if (strncmp (p, decimal_point, decimal_point_len) == 0)
982 if (decimal_point_len > 1)
984 rest_len = strlen (p + (decimal_point_len-1));
985 memmove (p, p + (decimal_point_len-1), rest_len);
994 #pragma GCC diagnostic pop
996 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
997 (c) == '\r' || (c) == '\t' || (c) == '\v')
998 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
999 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
1000 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1001 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1002 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1007 g_parse_long_long (const gchar *nptr,
1008 const gchar **endptr,
1012 /* this code is based on on the strtol(3) code from GNU libc released under
1013 * the GNU Lesser General Public License.
1015 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1016 * Free Software Foundation, Inc.
1022 const gchar *s, *save;
1025 g_return_val_if_fail (nptr != NULL, 0);
1028 if (base == 1 || base > 36)
1038 /* Skip white space. */
1039 while (ISSPACE (*s))
1042 if (G_UNLIKELY (!*s))
1045 /* Check for a sign. */
1054 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1057 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1068 /* Save the pointer so we can check later if anything happened. */
1070 cutoff = G_MAXUINT64 / base;
1071 cutlim = G_MAXUINT64 % base;
1078 if (c >= '0' && c <= '9')
1080 else if (ISALPHA (c))
1081 c = TOUPPER (c) - 'A' + 10;
1086 /* Check for overflow. */
1087 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1096 /* Check if anything actually happened. */
1100 /* Store in ENDPTR the address of one character
1101 past the last character we converted. */
1105 if (G_UNLIKELY (overflow))
1114 /* We must handle a special case here: the base is 0 or 16 and the
1115 first two characters are '0' and 'x', but the rest are no
1116 hexadecimal digits. This is no error case. We return 0 and
1117 ENDPTR points to the `x`. */
1120 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1122 *endptr = &save[-1];
1124 /* There was no number to convert. */
1129 #endif /* !USE_XLOCALE */
1133 * @nptr: the string to convert to a numeric value.
1134 * @endptr: if non-%NULL, it returns the character after
1135 * the last character used in the conversion.
1136 * @base: to be used for the conversion, 2..36 or 0
1138 * Converts a string to a #guint64 value.
1139 * This function behaves like the standard strtoull() function
1140 * does in the C locale. It does this without actually
1141 * changing the current locale, since that would not be
1144 * This function is typically used when reading configuration
1145 * files or other non-user input that should be locale independent.
1146 * To handle input from the user you should normally use the
1147 * locale-sensitive system strtoull() function.
1149 * If the correct value would cause overflow, %G_MAXUINT64
1150 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1151 * If the base is outside the valid range, zero is returned, and
1152 * <literal>EINVAL</literal> is stored in <literal>errno</literal>.
1153 * If the string conversion fails, zero is returned, and @endptr returns
1154 * @nptr (if @endptr is non-%NULL).
1156 * Return value: the #guint64 value or zero on error.
1161 g_ascii_strtoull (const gchar *nptr,
1166 return strtoull_l (nptr, endptr, base, get_C_locale ());
1171 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1173 /* Return the result of the appropriate sign. */
1174 return negative ? -result : result;
1180 * @nptr: the string to convert to a numeric value.
1181 * @endptr: if non-%NULL, it returns the character after
1182 * the last character used in the conversion.
1183 * @base: to be used for the conversion, 2..36 or 0
1185 * Converts a string to a #gint64 value.
1186 * This function behaves like the standard strtoll() function
1187 * does in the C locale. It does this without actually
1188 * changing the current locale, since that would not be
1191 * This function is typically used when reading configuration
1192 * files or other non-user input that should be locale independent.
1193 * To handle input from the user you should normally use the
1194 * locale-sensitive system strtoll() function.
1196 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1197 * is returned, and <literal>ERANGE</literal> is stored in <literal>errno</literal>.
1198 * If the base is outside the valid range, zero is returned, and
1199 * <literal>EINVAL</literal> is stored in <literal>errno</literal>. If the
1200 * string conversion fails, zero is returned, and @endptr returns @nptr
1201 * (if @endptr is non-%NULL).
1203 * Return value: the #gint64 value or zero on error.
1208 g_ascii_strtoll (const gchar *nptr,
1213 return strtoll_l (nptr, endptr, base, get_C_locale ());
1218 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1220 if (negative && result > (guint64) G_MININT64)
1225 else if (!negative && result > (guint64) G_MAXINT64)
1231 return - (gint64) result;
1233 return (gint64) result;
1239 * @errnum: the system error number. See the standard C %errno
1242 * Returns a string corresponding to the given error code, e.g.
1243 * "no such process". You should use this function in preference to
1244 * strerror(), because it returns a string in UTF-8 encoding, and since
1245 * not all platforms support the strerror() function.
1247 * Returns: a UTF-8 string describing the error code. If the error code
1248 * is unknown, it returns "unknown error (<code>)".
1251 g_strerror (gint errnum)
1254 gchar *tofree = NULL;
1256 gint saved_errno = errno;
1258 msg = strerror (errnum);
1259 if (!g_get_charset (NULL))
1260 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1262 ret = g_intern_string (msg);
1264 errno = saved_errno;
1270 * @signum: the signal number. See the <literal>signal</literal>
1273 * Returns a string describing the given signal, e.g. "Segmentation fault".
1274 * You should use this function in preference to strsignal(), because it
1275 * returns a string in UTF-8 encoding, and since not all platforms support
1276 * the strsignal() function.
1278 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1279 * it returns "unknown signal (<signum>)".
1282 g_strsignal (gint signum)
1288 msg = tofree = NULL;
1290 #ifdef HAVE_STRSIGNAL
1291 msg = strsignal (signum);
1292 if (!g_get_charset (NULL))
1293 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1297 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1298 ret = g_intern_string (msg);
1304 /* Functions g_strlcpy and g_strlcat were originally developed by
1305 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1306 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1307 * for more information.
1311 /* Use the native ones, if available; they might be implemented in assembly */
1313 g_strlcpy (gchar *dest,
1317 g_return_val_if_fail (dest != NULL, 0);
1318 g_return_val_if_fail (src != NULL, 0);
1320 return strlcpy (dest, src, dest_size);
1324 g_strlcat (gchar *dest,
1328 g_return_val_if_fail (dest != NULL, 0);
1329 g_return_val_if_fail (src != NULL, 0);
1331 return strlcat (dest, src, dest_size);
1334 #else /* ! HAVE_STRLCPY */
1337 * @dest: destination buffer
1338 * @src: source buffer
1339 * @dest_size: length of @dest in bytes
1341 * Portability wrapper that calls strlcpy() on systems which have it,
1342 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1343 * guaranteed to be nul-terminated; @src must be nul-terminated;
1344 * @dest_size is the buffer size, not the number of chars to copy.
1346 * At most dest_size - 1 characters will be copied. Always nul-terminates
1347 * (unless dest_size == 0). This function does <emphasis>not</emphasis>
1348 * allocate memory. Unlike strncpy(), this function doesn't pad dest (so
1349 * it's often faster). It returns the size of the attempted result,
1350 * strlen (src), so if @retval >= @dest_size, truncation occurred.
1352 * <note><para>Caveat: strlcpy() is supposedly more secure than
1353 * strcpy() or strncpy(), but if you really want to avoid screwups,
1354 * g_strdup() is an even better idea.</para></note>
1356 * Returns: length of @src
1359 g_strlcpy (gchar *dest,
1363 register gchar *d = dest;
1364 register const gchar *s = src;
1365 register gsize n = dest_size;
1367 g_return_val_if_fail (dest != NULL, 0);
1368 g_return_val_if_fail (src != NULL, 0);
1370 /* Copy as many bytes as will fit */
1371 if (n != 0 && --n != 0)
1374 register gchar c = *s++;
1382 /* If not enough room in dest, add NUL and traverse rest of src */
1391 return s - src - 1; /* count does not include NUL */
1396 * @dest: destination buffer, already containing one nul-terminated string
1397 * @src: source buffer
1398 * @dest_size: length of @dest buffer in bytes (not length of existing string
1401 * Portability wrapper that calls strlcat() on systems which have it,
1402 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1403 * guaranteeing nul-termination for @dest. The total size of @dest won't
1404 * exceed @dest_size.
1406 * At most dest_size - 1 characters will be copied.
1407 * Unlike strncat, dest_size is the full size of dest, not the space left over.
1408 * This function does NOT allocate memory.
1409 * This always NUL terminates (unless siz == 0 or there were no NUL characters
1410 * in the dest_size characters of dest to start with).
1412 * <note><para>Caveat: this is supposedly a more secure alternative to
1413 * strcat() or strncat(), but for real security g_strconcat() is harder
1414 * to mess up.</para></note>
1416 * Returns: size of attempted result, which is MIN (dest_size, strlen
1417 * (original dest)) + strlen (src), so if retval >= dest_size,
1418 * truncation occurred.
1421 g_strlcat (gchar *dest,
1425 register gchar *d = dest;
1426 register const gchar *s = src;
1427 register gsize bytes_left = dest_size;
1428 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1430 g_return_val_if_fail (dest != NULL, 0);
1431 g_return_val_if_fail (src != NULL, 0);
1433 /* Find the end of dst and adjust bytes left but don't go past end */
1434 while (*d != 0 && bytes_left-- != 0)
1437 bytes_left = dest_size - dlength;
1439 if (bytes_left == 0)
1440 return dlength + strlen (s);
1444 if (bytes_left != 1)
1453 return dlength + (s - src); /* count does not include NUL */
1455 #endif /* ! HAVE_STRLCPY */
1460 * @len: length of @str in bytes, or -1 if @str is nul-terminated.
1462 * Converts all upper case ASCII letters to lower case ASCII letters.
1464 * Return value: a newly-allocated string, with all the upper case
1465 * characters in @str converted to lower case, with
1466 * semantics that exactly match g_ascii_tolower(). (Note
1467 * that this is unlike the old g_strdown(), which modified
1468 * the string in place.)
1471 g_ascii_strdown (const gchar *str,
1476 g_return_val_if_fail (str != NULL, NULL);
1481 result = g_strndup (str, len);
1482 for (s = result; *s; s++)
1483 *s = g_ascii_tolower (*s);
1491 * @len: length of @str in bytes, or -1 if @str is nul-terminated.
1493 * Converts all lower case ASCII letters to upper case ASCII letters.
1495 * Return value: a newly allocated string, with all the lower case
1496 * characters in @str converted to upper case, with
1497 * semantics that exactly match g_ascii_toupper(). (Note
1498 * that this is unlike the old g_strup(), which modified
1499 * the string in place.)
1502 g_ascii_strup (const gchar *str,
1507 g_return_val_if_fail (str != NULL, NULL);
1512 result = g_strndup (str, len);
1513 for (s = result; *s; s++)
1514 *s = g_ascii_toupper (*s);
1521 * @string: a string.
1523 * Determines if a string is pure ASCII. A string is pure ASCII if it
1524 * contains no bytes with the high bit set.
1526 * Returns: %TRUE if @string is ascii
1531 g_str_is_ascii (const gchar *string)
1535 for (i = 0; string[i]; i++)
1536 if (string[i] & 0x80)
1544 * @string: the string to convert.
1546 * Converts a string to lower case.
1548 * Return value: the string
1550 * Deprecated:2.2: This function is totally broken for the reasons discussed
1551 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1555 g_strdown (gchar *string)
1559 g_return_val_if_fail (string != NULL, NULL);
1561 s = (guchar *) string;
1570 return (gchar *) string;
1575 * @string: the string to convert.
1577 * Converts a string to upper case.
1579 * Return value: the string
1581 * Deprecated:2.2: This function is totally broken for the reasons discussed
1582 * in the g_strncasecmp() docs - use g_ascii_strup() or g_utf8_strup() instead.
1585 g_strup (gchar *string)
1589 g_return_val_if_fail (string != NULL, NULL);
1591 s = (guchar *) string;
1600 return (gchar *) string;
1605 * @string: the string to reverse
1607 * Reverses all of the bytes in a string. For example,
1608 * <literal>g_strreverse ("abcdef")</literal> will result
1611 * Note that g_strreverse() doesn't work on UTF-8 strings
1612 * containing multibyte characters. For that purpose, use
1613 * g_utf8_strreverse().
1615 * Returns: the same pointer passed in as @string
1618 g_strreverse (gchar *string)
1620 g_return_val_if_fail (string != NULL, NULL);
1624 register gchar *h, *t;
1627 t = string + strlen (string) - 1;
1646 * @c: any character.
1648 * Convert a character to ASCII lower case.
1650 * Unlike the standard C library tolower() function, this only
1651 * recognizes standard ASCII letters and ignores the locale, returning
1652 * all non-ASCII characters unchanged, even if they are lower case
1653 * letters in a particular character set. Also unlike the standard
1654 * library function, this takes and returns a char, not an int, so
1655 * don't call it on <literal>EOF</literal> but no need to worry about casting to #guchar
1656 * before passing a possibly non-ASCII character in.
1658 * Return value: the result of converting @c to lower case.
1659 * If @c is not an ASCII upper case letter,
1660 * @c is returned unchanged.
1663 g_ascii_tolower (gchar c)
1665 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1670 * @c: any character.
1672 * Convert a character to ASCII upper case.
1674 * Unlike the standard C library toupper() function, this only
1675 * recognizes standard ASCII letters and ignores the locale, returning
1676 * all non-ASCII characters unchanged, even if they are upper case
1677 * letters in a particular character set. Also unlike the standard
1678 * library function, this takes and returns a char, not an int, so
1679 * don't call it on <literal>EOF</literal> but no need to worry about casting to #guchar
1680 * before passing a possibly non-ASCII character in.
1682 * Return value: the result of converting @c to upper case.
1683 * If @c is not an ASCII lower case letter,
1684 * @c is returned unchanged.
1687 g_ascii_toupper (gchar c)
1689 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1693 * g_ascii_digit_value:
1694 * @c: an ASCII character.
1696 * Determines the numeric value of a character as a decimal
1697 * digit. Differs from g_unichar_digit_value() because it takes
1698 * a char, so there's no worry about sign extension if characters
1701 * Return value: If @c is a decimal digit (according to
1702 * g_ascii_isdigit()), its numeric value. Otherwise, -1.
1705 g_ascii_digit_value (gchar c)
1707 if (g_ascii_isdigit (c))
1713 * g_ascii_xdigit_value:
1714 * @c: an ASCII character.
1716 * Determines the numeric value of a character as a hexidecimal
1717 * digit. Differs from g_unichar_xdigit_value() because it takes
1718 * a char, so there's no worry about sign extension if characters
1721 * Return value: If @c is a hex digit (according to
1722 * g_ascii_isxdigit()), its numeric value. Otherwise, -1.
1725 g_ascii_xdigit_value (gchar c)
1727 if (c >= 'A' && c <= 'F')
1728 return c - 'A' + 10;
1729 if (c >= 'a' && c <= 'f')
1730 return c - 'a' + 10;
1731 return g_ascii_digit_value (c);
1735 * g_ascii_strcasecmp:
1736 * @s1: string to compare with @s2.
1737 * @s2: string to compare with @s1.
1739 * Compare two strings, ignoring the case of ASCII characters.
1741 * Unlike the BSD strcasecmp() function, this only recognizes standard
1742 * ASCII letters and ignores the locale, treating all non-ASCII
1743 * bytes as if they are not letters.
1745 * This function should be used only on strings that are known to be
1746 * in encodings where the bytes corresponding to ASCII letters always
1747 * represent themselves. This includes UTF-8 and the ISO-8859-*
1748 * charsets, but not for instance double-byte encodings like the
1749 * Windows Codepage 932, where the trailing bytes of double-byte
1750 * characters include all ASCII letters. If you compare two CP932
1751 * strings using this function, you will get false matches.
1753 * Both @s1 and @s2 must be non-%NULL.
1755 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1756 * or a positive value if @s1 > @s2.
1759 g_ascii_strcasecmp (const gchar *s1,
1764 g_return_val_if_fail (s1 != NULL, 0);
1765 g_return_val_if_fail (s2 != NULL, 0);
1769 c1 = (gint)(guchar) TOLOWER (*s1);
1770 c2 = (gint)(guchar) TOLOWER (*s2);
1776 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1780 * g_ascii_strncasecmp:
1781 * @s1: string to compare with @s2.
1782 * @s2: string to compare with @s1.
1783 * @n: number of characters to compare.
1785 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1786 * characters after the first @n in each string.
1788 * Unlike the BSD strcasecmp() function, this only recognizes standard
1789 * ASCII letters and ignores the locale, treating all non-ASCII
1790 * characters as if they are not letters.
1792 * The same warning as in g_ascii_strcasecmp() applies: Use this
1793 * function only on strings known to be in encodings where bytes
1794 * corresponding to ASCII letters always represent themselves.
1796 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1797 * or a positive value if @s1 > @s2.
1800 g_ascii_strncasecmp (const gchar *s1,
1806 g_return_val_if_fail (s1 != NULL, 0);
1807 g_return_val_if_fail (s2 != NULL, 0);
1809 while (n && *s1 && *s2)
1812 c1 = (gint)(guchar) TOLOWER (*s1);
1813 c2 = (gint)(guchar) TOLOWER (*s2);
1820 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1828 * @s2: a string to compare with @s1.
1830 * A case-insensitive string comparison, corresponding to the standard
1831 * strcasecmp() function on platforms which support it.
1833 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1834 * or a positive value if @s1 > @s2.
1836 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this function
1837 * is deprecated and how to replace it.
1840 g_strcasecmp (const gchar *s1,
1843 #ifdef HAVE_STRCASECMP
1844 g_return_val_if_fail (s1 != NULL, 0);
1845 g_return_val_if_fail (s2 != NULL, 0);
1847 return strcasecmp (s1, s2);
1851 g_return_val_if_fail (s1 != NULL, 0);
1852 g_return_val_if_fail (s2 != NULL, 0);
1856 /* According to A. Cox, some platforms have islower's that
1857 * don't work right on non-uppercase
1859 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1860 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1866 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1873 * @s2: a string to compare with @s1.
1874 * @n: the maximum number of characters to compare.
1876 * A case-insensitive string comparison, corresponding to the standard
1877 * strncasecmp() function on platforms which support it.
1878 * It is similar to g_strcasecmp() except it only compares the first @n
1879 * characters of the strings.
1881 * Return value: 0 if the strings match, a negative value if @s1 < @s2,
1882 * or a positive value if @s1 > @s2.
1884 * Deprecated:2.2: The problem with g_strncasecmp() is that it does the
1885 * comparison by calling toupper()/tolower(). These functions are
1886 * locale-specific and operate on single bytes. However, it is impossible
1887 * to handle things correctly from an I18N standpoint by operating on
1888 * bytes, since characters may be multibyte. Thus g_strncasecmp() is
1889 * broken if your string is guaranteed to be ASCII, since it's
1890 * locale-sensitive, and it's broken if your string is localized, since
1891 * it doesn't work on many encodings at all, including UTF-8, EUC-JP,
1894 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1895 * which only works on ASCII and is not locale-sensitive, and
1896 * g_utf8_casefold() followed by strcmp() on the resulting strings, which is
1897 * good for case-insensitive sorting of UTF-8.
1900 g_strncasecmp (const gchar *s1,
1904 #ifdef HAVE_STRNCASECMP
1905 return strncasecmp (s1, s2, n);
1909 g_return_val_if_fail (s1 != NULL, 0);
1910 g_return_val_if_fail (s2 != NULL, 0);
1912 while (n && *s1 && *s2)
1915 /* According to A. Cox, some platforms have islower's that
1916 * don't work right on non-uppercase
1918 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1919 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1926 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1934 * @string: the string to convert
1935 * @delimiters: (allow-none): a string containing the current delimiters, or %NULL
1936 * to use the standard delimiters defined in #G_STR_DELIMITERS
1937 * @new_delimiter: the new delimiter character
1939 * Converts any delimiter characters in @string to @new_delimiter.
1940 * Any characters in @string which are found in @delimiters are
1941 * changed to the @new_delimiter character. Modifies @string in place,
1942 * and returns @string itself, not a copy. The return value is to
1943 * allow nesting such as
1945 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
1951 g_strdelimit (gchar *string,
1952 const gchar *delimiters,
1957 g_return_val_if_fail (string != NULL, NULL);
1960 delimiters = G_STR_DELIMITERS;
1962 for (c = string; *c; c++)
1964 if (strchr (delimiters, *c))
1973 * @string: a nul-terminated array of bytes
1974 * @valid_chars: bytes permitted in @string
1975 * @substitutor: replacement character for disallowed bytes
1977 * For each character in @string, if the character is not in
1978 * @valid_chars, replaces the character with @substitutor.
1979 * Modifies @string in place, and return @string itself, not
1980 * a copy. The return value is to allow nesting such as
1982 * g_ascii_strup (g_strcanon (str, "abc", '?'))
1988 g_strcanon (gchar *string,
1989 const gchar *valid_chars,
1994 g_return_val_if_fail (string != NULL, NULL);
1995 g_return_val_if_fail (valid_chars != NULL, NULL);
1997 for (c = string; *c; c++)
1999 if (!strchr (valid_chars, *c))
2008 * @source: a string to compress
2010 * Replaces all escaped characters with their one byte equivalent.
2012 * This function does the reverse conversion of g_strescape().
2014 * Returns: a newly-allocated copy of @source with all escaped
2015 * character compressed
2018 g_strcompress (const gchar *source)
2020 const gchar *p = source, *octal;
2024 g_return_val_if_fail (source != NULL, NULL);
2026 dest = g_malloc (strlen (source) + 1);
2037 g_warning ("g_strcompress: trailing \\");
2039 case '0': case '1': case '2': case '3': case '4':
2040 case '5': case '6': case '7':
2043 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2045 *q = (*q * 8) + (*p - '0');
2069 default: /* Also handles \" and \\ */
2086 * @source: a string to escape
2087 * @exceptions: a string of characters not to escape in @source
2089 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2090 * and '"' in the string @source by inserting a '\' before
2091 * them. Additionally all characters in the range 0x01-0x1F (everything
2092 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2093 * replaced with a '\' followed by their octal representation.
2094 * Characters supplied in @exceptions are not escaped.
2096 * g_strcompress() does the reverse conversion.
2098 * Returns: a newly-allocated copy of @source with certain
2099 * characters escaped. See above.
2102 g_strescape (const gchar *source,
2103 const gchar *exceptions)
2110 g_return_val_if_fail (source != NULL, NULL);
2112 p = (guchar *) source;
2113 /* Each source byte needs maximally four destination chars (\777) */
2114 q = dest = g_malloc (strlen (source) * 4 + 1);
2116 memset (excmap, 0, 256);
2119 guchar *e = (guchar *) exceptions;
2169 if ((*p < ' ') || (*p >= 0177))
2172 *q++ = '0' + (((*p) >> 6) & 07);
2173 *q++ = '0' + (((*p) >> 3) & 07);
2174 *q++ = '0' + ((*p) & 07);
2189 * @string: a string to remove the leading whitespace from
2191 * Removes leading whitespace from a string, by moving the rest
2192 * of the characters forward.
2194 * This function doesn't allocate or reallocate any memory;
2195 * it modifies @string in place. Therefore, it cannot be used on
2196 * statically allocated strings.
2198 * The pointer to @string is returned to allow the nesting of functions.
2200 * Also see g_strchomp() and g_strstrip().
2205 g_strchug (gchar *string)
2209 g_return_val_if_fail (string != NULL, NULL);
2211 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2214 memmove (string, start, strlen ((gchar *) start) + 1);
2221 * @string: a string to remove the trailing whitespace from
2223 * Removes trailing whitespace from a string.
2225 * This function doesn't allocate or reallocate any memory;
2226 * it modifies @string in place. Therefore, it cannot be used
2227 * on statically allocated strings.
2229 * The pointer to @string is returned to allow the nesting of functions.
2231 * Also see g_strchug() and g_strstrip().
2236 g_strchomp (gchar *string)
2240 g_return_val_if_fail (string != NULL, NULL);
2242 len = strlen (string);
2245 if (g_ascii_isspace ((guchar) string[len]))
2256 * @string: a string to split
2257 * @delimiter: a string which specifies the places at which to split
2258 * the string. The delimiter is not included in any of the resulting
2259 * strings, unless @max_tokens is reached.
2260 * @max_tokens: the maximum number of pieces to split @string into.
2261 * If this is less than 1, the string is split completely.
2263 * Splits a string into a maximum of @max_tokens pieces, using the given
2264 * @delimiter. If @max_tokens is reached, the remainder of @string is
2265 * appended to the last token.
2267 * As a special case, the result of splitting the empty string "" is an empty
2268 * vector, not a vector containing a single string. The reason for this
2269 * special case is that being able to represent a empty vector is typically
2270 * more useful than consistent handling of empty elements. If you do need
2271 * to represent empty elements, you'll need to check for the empty string
2272 * before calling g_strsplit().
2274 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2275 * g_strfreev() to free it.
2278 g_strsplit (const gchar *string,
2279 const gchar *delimiter,
2282 GSList *string_list = NULL, *slist;
2283 gchar **str_array, *s;
2285 const gchar *remainder;
2287 g_return_val_if_fail (string != NULL, NULL);
2288 g_return_val_if_fail (delimiter != NULL, NULL);
2289 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2292 max_tokens = G_MAXINT;
2295 s = strstr (remainder, delimiter);
2298 gsize delimiter_len = strlen (delimiter);
2300 while (--max_tokens && s)
2304 len = s - remainder;
2305 string_list = g_slist_prepend (string_list,
2306 g_strndup (remainder, len));
2308 remainder = s + delimiter_len;
2309 s = strstr (remainder, delimiter);
2315 string_list = g_slist_prepend (string_list, g_strdup (remainder));
2318 str_array = g_new (gchar*, n + 1);
2320 str_array[n--] = NULL;
2321 for (slist = string_list; slist; slist = slist->next)
2322 str_array[n--] = slist->data;
2324 g_slist_free (string_list);
2331 * @string: The string to be tokenized
2332 * @delimiters: A nul-terminated string containing bytes that are used
2333 * to split the string.
2334 * @max_tokens: The maximum number of tokens to split @string into.
2335 * If this is less than 1, the string is split completely
2337 * Splits @string into a number of tokens not containing any of the characters
2338 * in @delimiter. A token is the (possibly empty) longest string that does not
2339 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2340 * remainder is appended to the last token.
2342 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2343 * %NULL-terminated vector containing the three strings "abc", "def",
2346 * The result if g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2347 * vector containing the four strings "", "def", "ghi", and "".
2349 * As a special case, the result of splitting the empty string "" is an empty
2350 * vector, not a vector containing a single string. The reason for this
2351 * special case is that being able to represent a empty vector is typically
2352 * more useful than consistent handling of empty elements. If you do need
2353 * to represent empty elements, you'll need to check for the empty string
2354 * before calling g_strsplit_set().
2356 * Note that this function works on bytes not characters, so it can't be used
2357 * to delimit UTF-8 strings for anything but ASCII characters.
2359 * Return value: a newly-allocated %NULL-terminated array of strings. Use
2360 * g_strfreev() to free it.
2365 g_strsplit_set (const gchar *string,
2366 const gchar *delimiters,
2369 gboolean delim_table[256];
2370 GSList *tokens, *list;
2373 const gchar *current;
2377 g_return_val_if_fail (string != NULL, NULL);
2378 g_return_val_if_fail (delimiters != NULL, NULL);
2381 max_tokens = G_MAXINT;
2383 if (*string == '\0')
2385 result = g_new (char *, 1);
2390 memset (delim_table, FALSE, sizeof (delim_table));
2391 for (s = delimiters; *s != '\0'; ++s)
2392 delim_table[*(guchar *)s] = TRUE;
2397 s = current = string;
2400 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2402 token = g_strndup (current, s - current);
2403 tokens = g_slist_prepend (tokens, token);
2412 token = g_strndup (current, s - current);
2413 tokens = g_slist_prepend (tokens, token);
2416 result = g_new (gchar *, n_tokens + 1);
2418 result[n_tokens] = NULL;
2419 for (list = tokens; list != NULL; list = list->next)
2420 result[--n_tokens] = list->data;
2422 g_slist_free (tokens);
2429 * @str_array: a %NULL-terminated array of strings to free
2431 * Frees a %NULL-terminated array of strings, and the array itself.
2432 * If called on a %NULL value, g_strfreev() simply returns.
2435 g_strfreev (gchar **str_array)
2441 for (i = 0; str_array[i] != NULL; i++)
2442 g_free (str_array[i]);
2450 * @str_array: a %NULL-terminated array of strings
2452 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2453 * the new array should be freed by first freeing each string, then
2454 * the array itself. g_strfreev() does this for you. If called
2455 * on a %NULL value, g_strdupv() simply returns %NULL.
2457 * Return value: a new %NULL-terminated array of strings.
2460 g_strdupv (gchar **str_array)
2468 while (str_array[i])
2471 retval = g_new (gchar*, i + 1);
2474 while (str_array[i])
2476 retval[i] = g_strdup (str_array[i]);
2489 * @separator: (allow-none): a string to insert between each of the strings, or %NULL
2490 * @str_array: a %NULL-terminated array of strings to join
2492 * Joins a number of strings together to form one long string, with the
2493 * optional @separator inserted between each of them. The returned string
2494 * should be freed with g_free().
2496 * Returns: a newly-allocated string containing all of the strings joined
2497 * together, with @separator between them
2500 g_strjoinv (const gchar *separator,
2506 g_return_val_if_fail (str_array != NULL, NULL);
2508 if (separator == NULL)
2515 gsize separator_len;
2517 separator_len = strlen (separator);
2518 /* First part, getting length */
2519 len = 1 + strlen (str_array[0]);
2520 for (i = 1; str_array[i] != NULL; i++)
2521 len += strlen (str_array[i]);
2522 len += separator_len * (i - 1);
2524 /* Second part, building string */
2525 string = g_new (gchar, len);
2526 ptr = g_stpcpy (string, *str_array);
2527 for (i = 1; str_array[i] != NULL; i++)
2529 ptr = g_stpcpy (ptr, separator);
2530 ptr = g_stpcpy (ptr, str_array[i]);
2534 string = g_strdup ("");
2541 * @separator: (allow-none): a string to insert between each of the strings, or %NULL
2542 * @...: a %NULL-terminated list of strings to join
2544 * Joins a number of strings together to form one long string, with the
2545 * optional @separator inserted between each of them. The returned string
2546 * should be freed with g_free().
2548 * Returns: a newly-allocated string containing all of the strings joined
2549 * together, with @separator between them
2552 g_strjoin (const gchar *separator,
2558 gsize separator_len;
2561 if (separator == NULL)
2564 separator_len = strlen (separator);
2566 va_start (args, separator);
2568 s = va_arg (args, gchar*);
2572 /* First part, getting length */
2573 len = 1 + strlen (s);
2575 s = va_arg (args, gchar*);
2578 len += separator_len + strlen (s);
2579 s = va_arg (args, gchar*);
2583 /* Second part, building string */
2584 string = g_new (gchar, len);
2586 va_start (args, separator);
2588 s = va_arg (args, gchar*);
2589 ptr = g_stpcpy (string, s);
2591 s = va_arg (args, gchar*);
2594 ptr = g_stpcpy (ptr, separator);
2595 ptr = g_stpcpy (ptr, s);
2596 s = va_arg (args, gchar*);
2600 string = g_strdup ("");
2610 * @haystack: a string
2611 * @haystack_len: the maximum length of @haystack. Note that -1 is
2612 * a valid length, if @haystack is nul-terminated, meaning it will
2613 * search through the whole string.
2614 * @needle: the string to search for
2616 * Searches the string @haystack for the first occurrence
2617 * of the string @needle, limiting the length of the search
2620 * Return value: a pointer to the found occurrence, or
2621 * %NULL if not found.
2624 g_strstr_len (const gchar *haystack,
2625 gssize haystack_len,
2626 const gchar *needle)
2628 g_return_val_if_fail (haystack != NULL, NULL);
2629 g_return_val_if_fail (needle != NULL, NULL);
2631 if (haystack_len < 0)
2632 return strstr (haystack, needle);
2635 const gchar *p = haystack;
2636 gsize needle_len = strlen (needle);
2640 if (needle_len == 0)
2641 return (gchar *)haystack;
2643 if (haystack_len < needle_len)
2646 end = haystack + haystack_len - needle_len;
2648 while (p <= end && *p)
2650 for (i = 0; i < needle_len; i++)
2651 if (p[i] != needle[i])
2666 * @haystack: a nul-terminated string
2667 * @needle: the nul-terminated string to search for
2669 * Searches the string @haystack for the last occurrence
2670 * of the string @needle.
2672 * Return value: a pointer to the found occurrence, or
2673 * %NULL if not found.
2676 g_strrstr (const gchar *haystack,
2677 const gchar *needle)
2684 g_return_val_if_fail (haystack != NULL, NULL);
2685 g_return_val_if_fail (needle != NULL, NULL);
2687 needle_len = strlen (needle);
2688 haystack_len = strlen (haystack);
2690 if (needle_len == 0)
2691 return (gchar *)haystack;
2693 if (haystack_len < needle_len)
2696 p = haystack + haystack_len - needle_len;
2698 while (p >= haystack)
2700 for (i = 0; i < needle_len; i++)
2701 if (p[i] != needle[i])
2715 * @haystack: a nul-terminated string
2716 * @haystack_len: the maximum length of @haystack
2717 * @needle: the nul-terminated string to search for
2719 * Searches the string @haystack for the last occurrence
2720 * of the string @needle, limiting the length of the search
2723 * Return value: a pointer to the found occurrence, or
2724 * %NULL if not found.
2727 g_strrstr_len (const gchar *haystack,
2728 gssize haystack_len,
2729 const gchar *needle)
2731 g_return_val_if_fail (haystack != NULL, NULL);
2732 g_return_val_if_fail (needle != NULL, NULL);
2734 if (haystack_len < 0)
2735 return g_strrstr (haystack, needle);
2738 gsize needle_len = strlen (needle);
2739 const gchar *haystack_max = haystack + haystack_len;
2740 const gchar *p = haystack;
2743 while (p < haystack_max && *p)
2746 if (p < haystack + needle_len)
2751 while (p >= haystack)
2753 for (i = 0; i < needle_len; i++)
2754 if (p[i] != needle[i])
2770 * @str: a nul-terminated string
2771 * @suffix: the nul-terminated suffix to look for
2773 * Looks whether the string @str ends with @suffix.
2775 * Return value: %TRUE if @str end with @suffix, %FALSE otherwise.
2780 g_str_has_suffix (const gchar *str,
2781 const gchar *suffix)
2786 g_return_val_if_fail (str != NULL, FALSE);
2787 g_return_val_if_fail (suffix != NULL, FALSE);
2789 str_len = strlen (str);
2790 suffix_len = strlen (suffix);
2792 if (str_len < suffix_len)
2795 return strcmp (str + str_len - suffix_len, suffix) == 0;
2800 * @str: a nul-terminated string
2801 * @prefix: the nul-terminated prefix to look for
2803 * Looks whether the string @str begins with @prefix.
2805 * Return value: %TRUE if @str begins with @prefix, %FALSE otherwise.
2810 g_str_has_prefix (const gchar *str,
2811 const gchar *prefix)
2816 g_return_val_if_fail (str != NULL, FALSE);
2817 g_return_val_if_fail (prefix != NULL, FALSE);
2819 str_len = strlen (str);
2820 prefix_len = strlen (prefix);
2822 if (str_len < prefix_len)
2825 return strncmp (str, prefix, prefix_len) == 0;
2830 * @str_array: a %NULL-terminated array of strings
2832 * Returns the length of the given %NULL-terminated
2833 * string array @str_array.
2835 * Return value: length of @str_array.
2840 g_strv_length (gchar **str_array)
2844 g_return_val_if_fail (str_array != NULL, 0);
2846 while (str_array[i])
2853 index_add_folded (GPtrArray *array,
2859 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2861 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2862 if (strstr (normal, "ı") || strstr (normal, "İ"))
2867 tmp = g_string_new (NULL);
2873 i = strstr (s, "ı");
2874 I = strstr (s, "İ");
2887 g_string_append_len (tmp, s, e - s);
2888 g_string_append_c (tmp, 'i');
2889 s = g_utf8_next_char (e);
2892 g_string_append (tmp, s);
2894 normal = g_string_free (tmp, FALSE);
2897 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2902 split_words (const gchar *value)
2904 const gchar *start = NULL;
2908 result = g_ptr_array_new ();
2910 for (s = value; *s; s = g_utf8_next_char (s))
2912 gunichar c = g_utf8_get_char (s);
2916 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2921 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2923 index_add_folded (result, start, s);
2930 index_add_folded (result, start, s);
2932 g_ptr_array_add (result, NULL);
2934 return (gchar **) g_ptr_array_free (result, FALSE);
2938 * g_str_tokenize_and_fold:
2940 * @translit_locale: (allow-none): the language code (like 'de' or
2941 * 'en_GB') from which @string originates
2942 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2943 * return location for ASCII alternates
2945 * Tokenises @string and performs folding on each token.
2947 * A token is a non-empty sequence of alphanumeric characters in the
2948 * source string, separated by non-alphanumeric characters. An
2949 * "alphanumeric" character for this purpose is one that matches
2950 * g_unichar_isalnum() or g_unichar_ismark().
2952 * Each token is then (Unicode) normalised and case-folded. If
2953 * @ascii_alternates is non-%NULL and some of the returned tokens
2954 * contain non-ASCII characters, ASCII alternatives will be generated.
2956 * The number of ASCII alternatives that are generated and the method
2957 * for doing so is unspecified, but @translit_locale (if specified) may
2958 * improve the transliteration if the language of the source string is
2961 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
2966 g_str_tokenize_and_fold (const gchar *string,
2967 const gchar *translit_locale,
2968 gchar ***ascii_alternates)
2972 if (ascii_alternates && g_str_is_ascii (string))
2974 *ascii_alternates = g_new0 (gchar *, 0 + 1);
2975 ascii_alternates = NULL;
2978 result = split_words (string);
2980 /* TODO: proper iconv transliteration (locale-dependent) */
2981 if (ascii_alternates)
2985 n = g_strv_length (result);
2986 *ascii_alternates = g_new (gchar *, n + 1);
2989 for (i = 0; i < n; i++)
2991 if (!g_str_is_ascii (result[i]))
2998 decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL);
2999 ascii = g_malloc (strlen (decomposed) + 1);
3001 for (k = 0; decomposed[k]; k++)
3002 if (~decomposed[k] & 0x80)
3003 ascii[l++] = decomposed[k];
3006 (*ascii_alternates)[j++] = ascii;
3007 g_free (decomposed);
3011 (*ascii_alternates)[j] = NULL;
3018 * g_str_match_string:
3019 * @search_term: the search term from the user
3020 * @potential_hit: the text that may be a hit
3021 * @accept_alternates: %TRUE to accept ASCII alternates
3023 * Checks if a search conducted for @search_term should match
3026 * This function calls g_str_tokenize_and_fold() on both
3027 * @search_term and @potential_hit. ASCII alternates are never taken
3028 * for @search_term but will be taken for @potential_hit according to
3029 * the value of @accept_alternates.
3031 * A hit occurs when each folded token in @search_term is a prefix of a
3032 * folded token from @potential_hit.
3034 * Depending on how you're performing the search, it will typically be
3035 * faster to call g_str_tokenize_and_fold() on each string in
3036 * your corpus and build an index on the returned folded tokens, then
3037 * call g_str_tokenize_and_fold() on the search term and
3038 * perform lookups into that index.
3040 * As some examples, searching for "fred" would match the potential hit
3041 * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match
3042 * "Frédéric" but not "Frederic" (due to the one-directional nature of
3043 * accent matching). Searching "fo" would match "Foo" and "Bar Foo
3044 * Baz", but not "SFO" (because no word as "fo" as a prefix).
3046 * Returns: %TRUE if @potential_hit is a hit
3051 g_str_match_string (const gchar *search_term,
3052 const gchar *potential_hit,
3053 gboolean accept_alternates)
3055 gchar **alternates = NULL;
3056 gchar **term_tokens;
3061 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3062 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3066 for (i = 0; term_tokens[i]; i++)
3068 for (j = 0; hit_tokens[j]; j++)
3069 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3072 if (accept_alternates)
3073 for (j = 0; alternates[j]; j++)
3074 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3084 g_strfreev (term_tokens);
3085 g_strfreev (hit_tokens);
3086 g_strfreev (alternates);