X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgstrfuncs.c;h=f33f84b6886d997e25bac615d7ed860f907ace65;hb=ecf1359191b2f796a7d63288652dd1a93525417d;hp=8495697f86609667645e84799095ba13a17167cb;hpb=f218353e04ca61dbf5e4d7a3a024fe414dbfa46a;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c index 8495697..f33f84b 100644 --- a/glib/gstrfuncs.c +++ b/glib/gstrfuncs.c @@ -12,9 +12,7 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * License along with this library; if not, see . */ /* @@ -67,27 +65,25 @@ * * Note that the functions g_printf(), g_fprintf(), g_sprintf(), * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf() - * are declared in the header gprintf.h which is - * not included in glib.h - * (otherwise using glib.h would drag in - * stdio.h), so you'll have to explicitly include - * <glib/gprintf.h> in order to use the GLib + * are declared in the header `gprintf.h` which is not included in `glib.h` + * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to + * explicitly include `` in order to use the GLib * printf() functions. * - * While you may use the printf() functions - * to format UTF-8 strings, notice that the precision of a - * %Ns parameter is interpreted as the - * number of bytes, not characters - * to print. On top of that, the GNU libc implementation of the printf() - * functions has the "feature" that it checks that the string given for - * the %Ns parameter consists of a whole number - * of characters in the current encoding. So, unless you are sure you are - * always going to be in an UTF-8 locale or your know your text is restricted - * to ASCII, avoid using %Ns. If your intention is - * to format strings for a certain number of columns, then - * %Ns is not a correct solution anyway, since it - * fails to take wide characters (see g_unichar_iswide()) into account. - * + * ## String precision pitfalls # {#string-precision} + * + * While you may use the printf() functions to format UTF-8 strings, + * notice that the precision of a \%Ns parameter is interpreted + * as the number of bytes, not characters to print. On top of that, + * the GNU libc implementation of the printf() functions has the + * "feature" that it checks that the string given for the \%Ns + * parameter consists of a whole number of characters in the current + * encoding. So, unless you are sure you are always going to be in an + * UTF-8 locale or your know your text is restricted to ASCII, avoid + * using \%Ns. If your intention is to format strings for a + * certain number of columns, then \%Ns is not a correct solution + * anyway, since it fails to take wide characters (see g_unichar_iswide()) + * into account. */ /** @@ -99,9 +95,9 @@ * Unlike the standard C library isalnum() function, this only * recognizes standard ASCII letters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII alphanumeric character */ @@ -115,9 +111,9 @@ * Unlike the standard C library isalpha() function, this only * recognizes standard ASCII letters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII alphabetic character */ @@ -131,9 +127,9 @@ * Unlike the standard C library iscntrl() function, this only * recognizes standard ASCII control characters and ignores the * locale, returning %FALSE for all non-ASCII characters. Also, - * unlike the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * cast to #guchar before passing a possibly non-ASCII character in. + * unlike the standard library function, this takes a char, not + * an int, so don't call it on %EOF, but no need to cast to #guchar + * before passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII control character. */ @@ -145,9 +141,8 @@ * Determines whether a character is digit (0-9). * * Unlike the standard C library isdigit() function, this takes - * a char, not an int, so don't call it - * on %EOF, but no need to cast to #guchar before passing a possibly - * non-ASCII character in. + * a char, not an int, so don't call it on %EOF, but no need to + * cast to #guchar before passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII digit. */ @@ -161,9 +156,9 @@ * Unlike the standard C library isgraph() function, this only * recognizes standard ASCII characters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need - * to cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII printing character other than space. */ @@ -177,10 +172,9 @@ * Unlike the standard C library islower() function, this only * recognizes standard ASCII letters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need - * to worry about casting to #guchar before passing a possibly - * non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to worry about casting + * to #guchar before passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII lower case letter */ @@ -194,9 +188,9 @@ * Unlike the standard C library isprint() function, this only * recognizes standard ASCII characters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need - * to cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII printing character. */ @@ -210,9 +204,9 @@ * Unlike the standard C library ispunct() function, this only * recognizes standard ASCII letters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII punctuation character. */ @@ -226,9 +220,9 @@ * Unlike the standard C library isspace() function, this only * recognizes standard ASCII white-space and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * cast to #guchar before passing a possibly non-ASCII character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to cast to #guchar before + * passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII white-space character */ @@ -242,10 +236,9 @@ * Unlike the standard C library isupper() function, this only * recognizes standard ASCII letters and ignores the locale, * returning %FALSE for all non-ASCII characters. Also, unlike - * the standard library function, this takes a char, - * not an int, so don't call it on %EOF, but no need to - * worry about casting to #guchar before passing a possibly non-ASCII - * character in. + * the standard library function, this takes a char, not an int, + * so don't call it on %EOF, but no need to worry about casting + * to #guchar before passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII upper case letter */ @@ -257,9 +250,8 @@ * Determines whether a character is a hexadecimal-digit character. * * Unlike the standard C library isxdigit() function, this takes - * a char, not an int, so don't call it - * on %EOF, but no need to cast to #guchar before passing a - * possibly non-ASCII character in. + * a char, not an int, so don't call it on %EOF, but no need to + * cast to #guchar before passing a possibly non-ASCII character in. * * Returns: %TRUE if @c is an ASCII hexadecimal-digit character. */ @@ -272,10 +264,10 @@ * on systems with 64bit IEEE-compatible doubles. * * The typical usage would be something like: - * |[ + * |[ * char buf[G_ASCII_DTOSTR_BUF_SIZE]; * - * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value)); + * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value)); * ]| */ @@ -317,7 +309,15 @@ static const guint16 ascii_table_data[256] = { const guint16 * const g_ascii_table = ascii_table_data; -#ifdef HAVE_NEWLOCALE +#if defined (HAVE_NEWLOCALE) && \ + defined (HAVE_USELOCALE) && \ + defined (HAVE_STRTOD_L) && \ + defined (HAVE_STRTOULL_L) && \ + defined (HAVE_STRTOLL_L) +#define USE_XLOCALE 1 +#endif + +#ifdef USE_XLOCALE static locale_t get_C_locale (void) { @@ -396,18 +396,16 @@ g_memdup (gconstpointer mem, * @n: the maximum number of bytes to copy from @str * * Duplicates the first @n bytes of a string, returning a newly-allocated - * buffer @n + 1 bytes long which will always be nul-terminated. - * If @str is less than @n bytes long the buffer is padded with nuls. - * If @str is %NULL it returns %NULL. - * The returned value should be freed when no longer needed. + * buffer @n + 1 bytes long which will always be nul-terminated. If @str + * is less than @n bytes long the buffer is padded with nuls. If @str is + * %NULL it returns %NULL. The returned value should be freed when no longer + * needed. * - * - * To copy a number of characters from a UTF-8 encoded string, use - * g_utf8_strncpy() instead. - * + * To copy a number of characters from a UTF-8 encoded string, + * use g_utf8_strncpy() instead. * * Returns: a newly-allocated buffer containing the first @n bytes - * of @str, nul-terminated + * of @str, nul-terminated */ gchar* g_strndup (const gchar *str, @@ -460,7 +458,7 @@ g_strnfill (gsize length, * This is useful for concatenating multiple strings together * without having to repeatedly scan for the end. * - * Return value: a pointer to trailing nul byte. + * Returns: a pointer to trailing nul byte. **/ gchar * g_stpcpy (gchar *dest, @@ -471,8 +469,8 @@ g_stpcpy (gchar *dest, g_return_val_if_fail (src != NULL, NULL); return stpcpy (dest, src); #else - register gchar *d = dest; - register const gchar *s = src; + gchar *d = dest; + const gchar *s = src; g_return_val_if_fail (dest != NULL, NULL); g_return_val_if_fail (src != NULL, NULL); @@ -487,7 +485,7 @@ g_stpcpy (gchar *dest, /** * g_strdup_vprintf: * @format: a standard printf() format string, but notice - * string precision pitfalls + * [string precision pitfalls][string-precision] * @args: the list of parameters to insert into the format string * * Similar to the standard C vsprintf() function but safer, since it @@ -514,7 +512,7 @@ g_strdup_vprintf (const gchar *format, /** * g_strdup_printf: * @format: a standard printf() format string, but notice - * string precision pitfalls + * [string precision pitfalls][string-precision] * @...: the parameters to insert into the format string * * Similar to the standard C sprintf() function but safer, since it @@ -543,17 +541,16 @@ g_strdup_printf (const gchar *format, * @string1: the first string to add, which must not be %NULL * @...: a %NULL-terminated list of strings to append to the string * - * Concatenates all of the given strings into one long string. - * The returned string should be freed with g_free() when no longer needed. + * Concatenates all of the given strings into one long string. The + * returned string should be freed with g_free() when no longer needed. + * + * The variable argument list must end with %NULL. If you forget the %NULL, + * g_strconcat() will start appending random memory junk to your string. * * Note that this function is usually not the right function to use to * assemble a translated message from pieces, since proper translation * often requires the pieces to be reordered. * - * The variable argument list must end - * with %NULL. If you forget the %NULL, g_strconcat() will start appending - * random memory junk to your string. - * * Returns: a newly-allocated string containing all the string arguments */ gchar* @@ -612,7 +609,7 @@ g_strconcat (const gchar *string1, ...) * separated lists of values, since the commas may be interpreted as a decimal * point in some locales, causing unexpected results. * - * Return value: the #gdouble value. + * Returns: the #gdouble value. **/ gdouble g_strtod (const gchar *nptr, @@ -677,33 +674,45 @@ g_strtod (const gchar *nptr, * This function resets %errno before calling strtod() so that * you can reliably detect overflow and underflow. * - * Return value: the #gdouble value. + * Returns: the #gdouble value. */ gdouble g_ascii_strtod (const gchar *nptr, gchar **endptr) { +#ifdef USE_XLOCALE + g_return_val_if_fail (nptr != NULL, 0); -#ifdef HAVE_STRTOD_L errno = 0; return strtod_l (nptr, endptr, get_C_locale ()); + #else + gchar *fail_pos; gdouble val; +#ifndef __BIONIC__ struct lconv *locale_data; +#endif const char *decimal_point; int decimal_point_len; const char *p, *decimal_point_pos; const char *end = NULL; /* Silence gcc */ int strtod_errno; + g_return_val_if_fail (nptr != NULL, 0); + fail_pos = NULL; +#ifndef __BIONIC__ locale_data = localeconv (); decimal_point = locale_data->decimal_point; decimal_point_len = strlen (decimal_point); +#else + decimal_point = "."; + decimal_point_len = 1; +#endif g_assert (decimal_point_len != 0); @@ -845,13 +854,13 @@ g_ascii_strtod (const gchar *nptr, * Converts a #gdouble to a string, using the '.' as * decimal point. * - * This functions generates enough precision that converting + * This function generates enough precision that converting * the string back using g_ascii_strtod() gives the same machine-number * (on machines with IEEE compatible 64bit doubles). It is * guaranteed that the size of the resulting string will never * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes. * - * Return value: The pointer to the buffer with the converted string. + * Returns: The pointer to the buffer with the converted string. **/ gchar * g_ascii_dtostr (gchar *buffer, @@ -861,6 +870,9 @@ g_ascii_dtostr (gchar *buffer, return g_ascii_formatd (buffer, buf_len, "%.17g", d); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + /** * g_ascii_formatd: * @buffer: A buffer to place the resulting string in @@ -877,7 +889,7 @@ g_ascii_dtostr (gchar *buffer, * If you just want to want to serialize the value into a * string, use g_ascii_dtostr(). * - * Return value: The pointer to the buffer with the converted string. + * Returns: The pointer to the buffer with the converted string. */ gchar * g_ascii_formatd (gchar *buffer, @@ -885,16 +897,18 @@ g_ascii_formatd (gchar *buffer, const gchar *format, gdouble d) { -#ifdef HAVE_USELOCALE +#ifdef USE_XLOCALE locale_t old_locale; old_locale = uselocale (get_C_locale ()); - _g_snprintf (buffer, buf_len, format, d); + _g_snprintf (buffer, buf_len, format, d); uselocale (old_locale); return buffer; #else +#ifndef __BIONIC__ struct lconv *locale_data; +#endif const char *decimal_point; int decimal_point_len; gchar *p; @@ -925,9 +939,14 @@ g_ascii_formatd (gchar *buffer, _g_snprintf (buffer, buf_len, format, d); +#ifndef __BIONIC__ locale_data = localeconv (); decimal_point = locale_data->decimal_point; decimal_point_len = strlen (decimal_point); +#else + decimal_point = "."; + decimal_point_len = 1; +#endif g_assert (decimal_point_len != 0); @@ -961,6 +980,7 @@ g_ascii_formatd (gchar *buffer, return buffer; #endif } +#pragma GCC diagnostic pop #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \ (c) == '\r' || (c) == '\t' || (c) == '\v') @@ -970,7 +990,7 @@ g_ascii_formatd (gchar *buffer, #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c)) #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c)) -#if !defined(HAVE_STRTOLL_L) || !defined(HAVE_STRTOULL_L) +#ifndef USE_XLOCALE static guint64 g_parse_long_long (const gchar *nptr, @@ -1095,7 +1115,7 @@ g_parse_long_long (const gchar *nptr, } return 0; } -#endif +#endif /* !USE_XLOCALE */ /** * g_ascii_strtoull: @@ -1116,12 +1136,13 @@ g_parse_long_long (const gchar *nptr, * locale-sensitive system strtoull() function. * * If the correct value would cause overflow, %G_MAXUINT64 - * is returned, and %ERANGE is stored in %errno. If the base is - * outside the valid range, zero is returned, and %EINVAL is stored - * in %errno. If the string conversion fails, zero is returned, and - * @endptr returns @nptr (if @endptr is non-%NULL). + * is returned, and `ERANGE` is stored in `errno`. + * If the base is outside the valid range, zero is returned, and + * `EINVAL` is stored in `errno`. + * If the string conversion fails, zero is returned, and @endptr returns + * @nptr (if @endptr is non-%NULL). * - * Return value: the #guint64 value or zero on error. + * Returns: the #guint64 value or zero on error. * * Since: 2.2 */ @@ -1130,7 +1151,7 @@ g_ascii_strtoull (const gchar *nptr, gchar **endptr, guint base) { -#ifdef HAVE_STRTOULL_L +#ifdef USE_XLOCALE return strtoull_l (nptr, endptr, base, get_C_locale ()); #else gboolean negative; @@ -1162,12 +1183,13 @@ g_ascii_strtoull (const gchar *nptr, * locale-sensitive system strtoll() function. * * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64 - * is returned, and %ERANGE is stored in %errno. If the base is - * outside the valid range, zero is returned, and %EINVAL is stored - * in %errno. If the string conversion fails, zero is returned, and - * @endptr returns @nptr (if @endptr is non-%NULL). + * is returned, and `ERANGE` is stored in `errno`. + * If the base is outside the valid range, zero is returned, and + * `EINVAL` is stored in `errno`. If the + * string conversion fails, zero is returned, and @endptr returns @nptr + * (if @endptr is non-%NULL). * - * Return value: the #gint64 value or zero on error. + * Returns: the #gint64 value or zero on error. * * Since: 2.12 */ @@ -1176,7 +1198,7 @@ g_ascii_strtoll (const gchar *nptr, gchar **endptr, guint base) { -#ifdef HAVE_STRTOLL_L +#ifdef USE_XLOCALE return strtoll_l (nptr, endptr, base, get_C_locale ()); #else gboolean negative; @@ -1212,30 +1234,19 @@ g_ascii_strtoll (const gchar *nptr, * not all platforms support the strerror() function. * * Returns: a UTF-8 string describing the error code. If the error code - * is unknown, it returns "unknown error (<code>)". + * is unknown, it returns "unknown error ()". */ const gchar * g_strerror (gint errnum) { - gchar buf[64]; gchar *msg; - gchar *tofree; + gchar *tofree = NULL; const gchar *ret; gint saved_errno = errno; - msg = tofree = NULL; - -#ifdef HAVE_STRERROR msg = strerror (errnum); if (!g_get_charset (NULL)) msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL); -#endif - - if (!msg) - { - msg = buf; - _g_sprintf (msg, "unknown error (%d)", errnum); - } ret = g_intern_string (msg); g_free (tofree); @@ -1245,8 +1256,7 @@ g_strerror (gint errnum) /** * g_strsignal: - * @signum: the signal number. See the signal - * documentation + * @signum: the signal number. See the `signal` documentation * * Returns a string describing the given signal, e.g. "Segmentation fault". * You should use this function in preference to strsignal(), because it @@ -1254,7 +1264,7 @@ g_strerror (gint errnum) * the strsignal() function. * * Returns: a UTF-8 string describing the signal. If the signal is unknown, - * it returns "unknown signal (<signum>)". + * it returns "unknown signal ()". */ const gchar * g_strsignal (gint signum) @@ -1319,17 +1329,17 @@ g_strlcat (gchar *dest, * Portability wrapper that calls strlcpy() on systems which have it, * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is * guaranteed to be nul-terminated; @src must be nul-terminated; - * @dest_size is the buffer size, not the number of chars to copy. + * @dest_size is the buffer size, not the number of bytes to copy. * - * At most dest_size - 1 characters will be copied. Always nul-terminates - * (unless dest_size == 0). This function does not - * allocate memory. Unlike strncpy(), this function doesn't pad dest (so - * it's often faster). It returns the size of the attempted result, - * strlen (src), so if @retval >= @dest_size, truncation occurred. + * At most @dest_size - 1 characters will be copied. Always nul-terminates + * (unless @dest_size is 0). This function does not allocate memory. Unlike + * strncpy(), this function doesn't pad @dest (so it's often faster). It + * returns the size of the attempted result, strlen (src), so if + * @retval >= @dest_size, truncation occurred. * - * Caveat: strlcpy() is supposedly more secure than - * strcpy() or strncpy(), but if you really want to avoid screwups, - * g_strdup() is an even better idea. + * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(), + * but if you really want to avoid screwups, g_strdup() is an even better + * idea. * * Returns: length of @src */ @@ -1338,9 +1348,9 @@ g_strlcpy (gchar *dest, const gchar *src, gsize dest_size) { - register gchar *d = dest; - register const gchar *s = src; - register gsize n = dest_size; + gchar *d = dest; + const gchar *s = src; + gsize n = dest_size; g_return_val_if_fail (dest != NULL, 0); g_return_val_if_fail (src != NULL, 0); @@ -1349,7 +1359,7 @@ g_strlcpy (gchar *dest, if (n != 0 && --n != 0) do { - register gchar c = *s++; + gchar c = *s++; *d++ = c; if (c == 0) @@ -1381,28 +1391,27 @@ g_strlcpy (gchar *dest, * guaranteeing nul-termination for @dest. The total size of @dest won't * exceed @dest_size. * - * At most dest_size - 1 characters will be copied. - * Unlike strncat, dest_size is the full size of dest, not the space left over. - * This function does NOT allocate memory. - * This always NUL terminates (unless siz == 0 or there were no NUL characters - * in the dest_size characters of dest to start with). + * At most @dest_size - 1 characters will be copied. Unlike strncat(), + * @dest_size is the full size of dest, not the space left over. This + * function does not allocate memory. It always nul-terminates (unless + * @dest_size == 0 or there were no nul characters in the @dest_size + * characters of dest to start with). * - * Caveat: this is supposedly a more secure alternative to - * strcat() or strncat(), but for real security g_strconcat() is harder - * to mess up. + * Caveat: this is supposedly a more secure alternative to strcat() or + * strncat(), but for real security g_strconcat() is harder to mess up. * * Returns: size of attempted result, which is MIN (dest_size, strlen - * (original dest)) + strlen (src), so if retval >= dest_size, - * truncation occurred. - **/ + * (original dest)) + strlen (src), so if retval >= dest_size, + * truncation occurred. + */ gsize g_strlcat (gchar *dest, const gchar *src, gsize dest_size) { - register gchar *d = dest; - register const gchar *s = src; - register gsize bytes_left = dest_size; + gchar *d = dest; + const gchar *s = src; + gsize bytes_left = dest_size; gsize dlength; /* Logically, MIN (strlen (d), dest_size) */ g_return_val_if_fail (dest != NULL, 0); @@ -1434,17 +1443,16 @@ g_strlcat (gchar *dest, /** * g_ascii_strdown: - * @str: a string. - * @len: length of @str in bytes, or -1 if @str is nul-terminated. + * @str: a string + * @len: length of @str in bytes, or -1 if @str is nul-terminated * * Converts all upper case ASCII letters to lower case ASCII letters. * - * Return value: a newly-allocated string, with all the upper case - * characters in @str converted to lower case, with - * semantics that exactly match g_ascii_tolower(). (Note - * that this is unlike the old g_strdown(), which modified - * the string in place.) - **/ + * Returns: a newly-allocated string, with all the upper case + * characters in @str converted to lower case, with semantics that + * exactly match g_ascii_tolower(). (Note that this is unlike the + * old g_strdown(), which modified the string in place.) + */ gchar* g_ascii_strdown (const gchar *str, gssize len) @@ -1465,17 +1473,16 @@ g_ascii_strdown (const gchar *str, /** * g_ascii_strup: - * @str: a string. - * @len: length of @str in bytes, or -1 if @str is nul-terminated. + * @str: a string + * @len: length of @str in bytes, or -1 if @str is nul-terminated * * Converts all lower case ASCII letters to upper case ASCII letters. * - * Return value: a newly allocated string, with all the lower case - * characters in @str converted to upper case, with - * semantics that exactly match g_ascii_toupper(). (Note - * that this is unlike the old g_strup(), which modified - * the string in place.) - **/ + * Returns: a newly allocated string, with all the lower case + * characters in @str converted to upper case, with semantics that + * exactly match g_ascii_toupper(). (Note that this is unlike the + * old g_strup(), which modified the string in place.) + */ gchar* g_ascii_strup (const gchar *str, gssize len) @@ -1495,12 +1502,35 @@ g_ascii_strup (const gchar *str, } /** + * g_str_is_ascii: + * @str: a string + * + * Determines if a string is pure ASCII. A string is pure ASCII if it + * contains no bytes with the high bit set. + * + * Returns: %TRUE if @str is ASCII + * + * Since: 2.40 + */ +gboolean +g_str_is_ascii (const gchar *str) +{ + gint i; + + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return FALSE; + + return TRUE; +} + +/** * g_strdown: * @string: the string to convert. * * Converts a string to lower case. * - * Return value: the string + * Returns: the string * * Deprecated:2.2: This function is totally broken for the reasons discussed * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown() @@ -1509,7 +1539,7 @@ g_ascii_strup (const gchar *str, gchar* g_strdown (gchar *string) { - register guchar *s; + guchar *s; g_return_val_if_fail (string != NULL, NULL); @@ -1527,19 +1557,20 @@ g_strdown (gchar *string) /** * g_strup: - * @string: the string to convert. + * @string: the string to convert * * Converts a string to upper case. * - * Return value: the string + * Returns: the string * - * Deprecated:2.2: This function is totally broken for the reasons discussed - * in the g_strncasecmp() docs - use g_ascii_strup() or g_utf8_strup() instead. - **/ + * Deprecated:2.2: This function is totally broken for the reasons + * discussed in the g_strncasecmp() docs - use g_ascii_strup() + * or g_utf8_strup() instead. + */ gchar* g_strup (gchar *string) { - register guchar *s; + guchar *s; g_return_val_if_fail (string != NULL, NULL); @@ -1560,8 +1591,7 @@ g_strup (gchar *string) * @string: the string to reverse * * Reverses all of the bytes in a string. For example, - * g_strreverse ("abcdef") will result - * in "fedcba". + * `g_strreverse ("abcdef")` will result in "fedcba". * * Note that g_strreverse() doesn't work on UTF-8 strings * containing multibyte characters. For that purpose, use @@ -1576,14 +1606,14 @@ g_strreverse (gchar *string) if (*string) { - register gchar *h, *t; + gchar *h, *t; h = string; t = string + strlen (string) - 1; while (h < t) { - register gchar c; + gchar c; c = *h; *h = *t; @@ -1598,7 +1628,7 @@ g_strreverse (gchar *string) /** * g_ascii_tolower: - * @c: any character. + * @c: any character * * Convert a character to ASCII lower case. * @@ -1610,10 +1640,9 @@ g_strreverse (gchar *string) * don't call it on %EOF but no need to worry about casting to #guchar * before passing a possibly non-ASCII character in. * - * Return value: the result of converting @c to lower case. - * If @c is not an ASCII upper case letter, - * @c is returned unchanged. - **/ + * Returns: the result of converting @c to lower case. If @c is + * not an ASCII upper case letter, @c is returned unchanged. + */ gchar g_ascii_tolower (gchar c) { @@ -1622,7 +1651,7 @@ g_ascii_tolower (gchar c) /** * g_ascii_toupper: - * @c: any character. + * @c: any character * * Convert a character to ASCII upper case. * @@ -1634,10 +1663,9 @@ g_ascii_tolower (gchar c) * don't call it on %EOF but no need to worry about casting to #guchar * before passing a possibly non-ASCII character in. * - * Return value: the result of converting @c to upper case. - * If @c is not an ASCII lower case letter, - * @c is returned unchanged. - **/ + * Returns: the result of converting @c to upper case. If @c is not + * an ASCII lower case letter, @c is returned unchanged. + */ gchar g_ascii_toupper (gchar c) { @@ -1646,16 +1674,15 @@ g_ascii_toupper (gchar c) /** * g_ascii_digit_value: - * @c: an ASCII character. + * @c: an ASCII character * - * Determines the numeric value of a character as a decimal - * digit. Differs from g_unichar_digit_value() because it takes - * a char, so there's no worry about sign extension if characters - * are signed. + * Determines the numeric value of a character as a decimal digit. + * Differs from g_unichar_digit_value() because it takes a char, so + * there's no worry about sign extension if characters are signed. * - * Return value: If @c is a decimal digit (according to - * g_ascii_isdigit()), its numeric value. Otherwise, -1. - **/ + * Returns: If @c is a decimal digit (according to g_ascii_isdigit()), + * its numeric value. Otherwise, -1. + */ int g_ascii_digit_value (gchar c) { @@ -1673,9 +1700,9 @@ g_ascii_digit_value (gchar c) * a char, so there's no worry about sign extension if characters * are signed. * - * Return value: If @c is a hex digit (according to - * g_ascii_isxdigit()), its numeric value. Otherwise, -1. - **/ + * Returns: If @c is a hex digit (according to g_ascii_isxdigit()), + * its numeric value. Otherwise, -1. + */ int g_ascii_xdigit_value (gchar c) { @@ -1688,8 +1715,8 @@ g_ascii_xdigit_value (gchar c) /** * g_ascii_strcasecmp: - * @s1: string to compare with @s2. - * @s2: string to compare with @s1. + * @s1: string to compare with @s2 + * @s2: string to compare with @s1 * * Compare two strings, ignoring the case of ASCII characters. * @@ -1705,9 +1732,11 @@ g_ascii_xdigit_value (gchar c) * characters include all ASCII letters. If you compare two CP932 * strings using this function, you will get false matches. * - * Return value: 0 if the strings match, a negative value if @s1 < @s2, - * or a positive value if @s1 > @s2. - **/ + * Both @s1 and @s2 must be non-%NULL. + * + * Returns: 0 if the strings match, a negative value if @s1 < @s2, + * or a positive value if @s1 > @s2. + */ gint g_ascii_strcasecmp (const gchar *s1, const gchar *s2) @@ -1731,9 +1760,9 @@ g_ascii_strcasecmp (const gchar *s1, /** * g_ascii_strncasecmp: - * @s1: string to compare with @s2. - * @s2: string to compare with @s1. - * @n: number of characters to compare. + * @s1: string to compare with @s2 + * @s2: string to compare with @s1 + * @n: number of characters to compare * * Compare @s1 and @s2, ignoring the case of ASCII characters and any * characters after the first @n in each string. @@ -1746,13 +1775,13 @@ g_ascii_strcasecmp (const gchar *s1, * function only on strings known to be in encodings where bytes * corresponding to ASCII letters always represent themselves. * - * Return value: 0 if the strings match, a negative value if @s1 < @s2, - * or a positive value if @s1 > @s2. - **/ + * Returns: 0 if the strings match, a negative value if @s1 < @s2, + * or a positive value if @s1 > @s2. + */ gint g_ascii_strncasecmp (const gchar *s1, const gchar *s2, - gsize n) + gsize n) { gint c1, c2; @@ -1777,18 +1806,18 @@ g_ascii_strncasecmp (const gchar *s1, /** * g_strcasecmp: - * @s1: a string. - * @s2: a string to compare with @s1. + * @s1: a string + * @s2: a string to compare with @s1 * * A case-insensitive string comparison, corresponding to the standard * strcasecmp() function on platforms which support it. * - * Return value: 0 if the strings match, a negative value if @s1 < @s2, - * or a positive value if @s1 > @s2. + * Returns: 0 if the strings match, a negative value if @s1 < @s2, + * or a positive value if @s1 > @s2. * - * Deprecated:2.2: See g_strncasecmp() for a discussion of why this function - * is deprecated and how to replace it. - **/ + * Deprecated:2.2: See g_strncasecmp() for a discussion of why this + * function is deprecated and how to replace it. + */ gint g_strcasecmp (const gchar *s1, const gchar *s2) @@ -1822,32 +1851,33 @@ g_strcasecmp (const gchar *s1, /** * g_strncasecmp: - * @s1: a string. - * @s2: a string to compare with @s1. - * @n: the maximum number of characters to compare. + * @s1: a string + * @s2: a string to compare with @s1 + * @n: the maximum number of characters to compare * * A case-insensitive string comparison, corresponding to the standard - * strncasecmp() function on platforms which support it. - * It is similar to g_strcasecmp() except it only compares the first @n - * characters of the strings. - * - * Return value: 0 if the strings match, a negative value if @s1 < @s2, - * or a positive value if @s1 > @s2. - * - * Deprecated:2.2: The problem with g_strncasecmp() is that it does the - * comparison by calling toupper()/tolower(). These functions are - * locale-specific and operate on single bytes. However, it is impossible - * to handle things correctly from an I18N standpoint by operating on - * bytes, since characters may be multibyte. Thus g_strncasecmp() is - * broken if your string is guaranteed to be ASCII, since it's - * locale-sensitive, and it's broken if your string is localized, since - * it doesn't work on many encodings at all, including UTF-8, EUC-JP, - * etc. - * - * There are therefore two replacement functions: g_ascii_strncasecmp(), - * which only works on ASCII and is not locale-sensitive, and - * g_utf8_casefold(), which is good for case-insensitive sorting of UTF-8. - **/ + * strncasecmp() function on platforms which support it. It is similar + * to g_strcasecmp() except it only compares the first @n characters of + * the strings. + * + * Returns: 0 if the strings match, a negative value if @s1 < @s2, + * or a positive value if @s1 > @s2. + * + * Deprecated:2.2: The problem with g_strncasecmp() is that it does + * the comparison by calling toupper()/tolower(). These functions + * are locale-specific and operate on single bytes. However, it is + * impossible to handle things correctly from an internationalization + * standpoint by operating on bytes, since characters may be multibyte. + * Thus g_strncasecmp() is broken if your string is guaranteed to be + * ASCII, since it is locale-sensitive, and it's broken if your string + * is localized, since it doesn't work on many encodings at all, + * including UTF-8, EUC-JP, etc. + * + * There are therefore two replacement techniques: g_ascii_strncasecmp(), + * which only works on ASCII and is not locale-sensitive, and + * g_utf8_casefold() followed by strcmp() on the resulting strings, + * which is good for case-insensitive sorting of UTF-8. + */ gint g_strncasecmp (const gchar *s1, const gchar *s2, @@ -1884,8 +1914,8 @@ g_strncasecmp (const gchar *s1, /** * g_strdelimit: * @string: the string to convert - * @delimiters: a string containing the current delimiters, or %NULL - * to use the standard delimiters defined in #G_STR_DELIMITERS + * @delimiters: (allow-none): a string containing the current delimiters, + * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS * @new_delimiter: the new delimiter character * * Converts any delimiter characters in @string to @new_delimiter. @@ -1893,7 +1923,7 @@ g_strncasecmp (const gchar *s1, * changed to the @new_delimiter character. Modifies @string in place, * and returns @string itself, not a copy. The return value is to * allow nesting such as - * |[ + * |[ * g_ascii_strup (g_strdelimit (str, "abc", '?')) * ]| * @@ -1904,7 +1934,7 @@ g_strdelimit (gchar *string, const gchar *delimiters, gchar new_delim) { - register gchar *c; + gchar *c; g_return_val_if_fail (string != NULL, NULL); @@ -1926,11 +1956,11 @@ g_strdelimit (gchar *string, * @valid_chars: bytes permitted in @string * @substitutor: replacement character for disallowed bytes * - * For each character in @string, if the character is not in - * @valid_chars, replaces the character with @substitutor. - * Modifies @string in place, and return @string itself, not - * a copy. The return value is to allow nesting such as - * |[ + * For each character in @string, if the character is not in @valid_chars, + * replaces the character with @substitutor. Modifies @string in place, + * and return @string itself, not a copy. The return value is to allow + * nesting such as + * |[ * g_ascii_strup (g_strcanon (str, "abc", '?')) * ]| * @@ -1941,7 +1971,7 @@ g_strcanon (gchar *string, const gchar *valid_chars, gchar substitutor) { - register gchar *c; + gchar *c; g_return_val_if_fail (string != NULL, NULL); g_return_val_if_fail (valid_chars != NULL, NULL); @@ -1970,8 +2000,13 @@ gchar * g_strcompress (const gchar *source) { const gchar *p = source, *octal; - gchar *dest = g_malloc (strlen (source) + 1); - gchar *q = dest; + gchar *dest; + gchar *q; + + g_return_val_if_fail (source != NULL, NULL); + + dest = g_malloc (strlen (source) + 1); + q = dest; while (*p) { @@ -2010,6 +2045,9 @@ g_strcompress (const gchar *source) case 't': *q++ = '\t'; break; + case 'v': + *q++ = '\v'; + break; default: /* Also handles \" and \\ */ *q++ = *p; break; @@ -2030,7 +2068,7 @@ out: * @source: a string to escape * @exceptions: a string of characters not to escape in @source * - * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\' + * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\' * and '"' in the string @source by inserting a '\' before * them. Additionally all characters in the range 0x01-0x1F (everything * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are @@ -2097,6 +2135,10 @@ g_strescape (const gchar *source, *q++ = '\\'; *q++ = 't'; break; + case '\v': + *q++ = '\\'; + *q++ = 'v'; + break; case '\\': *q++ = '\\'; *q++ = '\\'; @@ -2132,8 +2174,10 @@ g_strescape (const gchar *source, * of the characters forward. * * This function doesn't allocate or reallocate any memory; - * it modifies @string in place. The pointer to @string is - * returned to allow the nesting of functions. + * it modifies @string in place. Therefore, it cannot be used on + * statically allocated strings. + * + * The pointer to @string is returned to allow the nesting of functions. * * Also see g_strchomp() and g_strstrip(). * @@ -2149,7 +2193,7 @@ g_strchug (gchar *string) for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++) ; - g_memmove (string, start, strlen ((gchar *) start) + 1); + memmove (string, start, strlen ((gchar *) start) + 1); return string; } @@ -2161,12 +2205,14 @@ g_strchug (gchar *string) * Removes trailing whitespace from a string. * * This function doesn't allocate or reallocate any memory; - * it modifies @string in place. The pointer to @string is - * returned to allow the nesting of functions. + * it modifies @string in place. Therefore, it cannot be used + * on statically allocated strings. + * + * The pointer to @string is returned to allow the nesting of functions. * * Also see g_strchug() and g_strstrip(). * - * Returns: @string. + * Returns: @string */ gchar * g_strchomp (gchar *string) @@ -2200,6 +2246,10 @@ g_strchomp (gchar *string) * @delimiter. If @max_tokens is reached, the remainder of @string is * appended to the last token. * + * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a + * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d" + * and "". + * * As a special case, the result of splitting the empty string "" is an empty * vector, not a vector containing a single string. The reason for this * special case is that being able to represent a empty vector is typically @@ -2207,7 +2257,7 @@ g_strchomp (gchar *string) * to represent empty elements, you'll need to check for the empty string * before calling g_strsplit(). * - * Return value: a newly-allocated %NULL-terminated array of strings. Use + * Returns: a newly-allocated %NULL-terminated array of strings. Use * g_strfreev() to free it. */ gchar** @@ -2279,7 +2329,7 @@ g_strsplit (const gchar *string, * %NULL-terminated vector containing the three strings "abc", "def", * and "ghi". * - * The result if g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated + * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated * vector containing the four strings "", "def", "ghi", and "". * * As a special case, the result of splitting the empty string "" is an empty @@ -2292,7 +2342,7 @@ g_strsplit (const gchar *string, * Note that this function works on bytes not characters, so it can't be used * to delimit UTF-8 strings for anything but ASCII characters. * - * Return value: a newly-allocated %NULL-terminated array of strings. Use + * Returns: a newly-allocated %NULL-terminated array of strings. Use * g_strfreev() to free it. * * Since: 2.4 @@ -2366,7 +2416,7 @@ g_strsplit_set (const gchar *string, * Frees a %NULL-terminated array of strings, and the array itself. * If called on a %NULL value, g_strfreev() simply returns. - **/ + */ void g_strfreev (gchar **str_array) { @@ -2390,7 +2440,7 @@ g_strfreev (gchar **str_array) * the array itself. g_strfreev() does this for you. If called * on a %NULL value, g_strdupv() simply returns %NULL. * - * Return value: a new %NULL-terminated array of strings. + * Returns: a new %NULL-terminated array of strings. */ gchar** g_strdupv (gchar **str_array) @@ -2422,7 +2472,8 @@ g_strdupv (gchar **str_array) /** * g_strjoinv: - * @separator: a string to insert between each of the strings, or %NULL + * @separator: (allow-none): a string to insert between each of the + * strings, or %NULL * @str_array: a %NULL-terminated array of strings to join * * Joins a number of strings together to form one long string, with the @@ -2474,7 +2525,8 @@ g_strjoinv (const gchar *separator, /** * g_strjoin: - * @separator: a string to insert between each of the strings, or %NULL + * @separator: (allow-none): a string to insert between each of the + * strings, or %NULL * @...: a %NULL-terminated list of strings to join * * Joins a number of strings together to form one long string, with the @@ -2553,7 +2605,7 @@ g_strjoin (const gchar *separator, * of the string @needle, limiting the length of the search * to @haystack_len. * - * Return value: a pointer to the found occurrence, or + * Returns: a pointer to the found occurrence, or * %NULL if not found. */ gchar * @@ -2605,7 +2657,7 @@ g_strstr_len (const gchar *haystack, * Searches the string @haystack for the last occurrence * of the string @needle. * - * Return value: a pointer to the found occurrence, or + * Returns: a pointer to the found occurrence, or * %NULL if not found. */ gchar * @@ -2656,7 +2708,7 @@ g_strrstr (const gchar *haystack, * of the string @needle, limiting the length of the search * to @haystack_len. * - * Return value: a pointer to the found occurrence, or + * Returns: a pointer to the found occurrence, or * %NULL if not found. */ gchar * @@ -2708,7 +2760,7 @@ g_strrstr_len (const gchar *haystack, * * Looks whether the string @str ends with @suffix. * - * Return value: %TRUE if @str end with @suffix, %FALSE otherwise. + * Returns: %TRUE if @str end with @suffix, %FALSE otherwise. * * Since: 2.2 */ @@ -2738,7 +2790,7 @@ g_str_has_suffix (const gchar *str, * * Looks whether the string @str begins with @prefix. * - * Return value: %TRUE if @str begins with @prefix, %FALSE otherwise. + * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise. * * Since: 2.2 */ @@ -2746,19 +2798,10 @@ gboolean g_str_has_prefix (const gchar *str, const gchar *prefix) { - int str_len; - int prefix_len; - g_return_val_if_fail (str != NULL, FALSE); g_return_val_if_fail (prefix != NULL, FALSE); - str_len = strlen (str); - prefix_len = strlen (prefix); - - if (str_len < prefix_len) - return FALSE; - - return strncmp (str, prefix, prefix_len) == 0; + return strncmp (str, prefix, strlen (prefix)) == 0; } /** @@ -2768,7 +2811,7 @@ g_str_has_prefix (const gchar *str, * Returns the length of the given %NULL-terminated * string array @str_array. * - * Return value: length of @str_array. + * Returns: length of @str_array. * * Since: 2.6 */ @@ -2784,3 +2827,251 @@ g_strv_length (gchar **str_array) return i; } + +static void +index_add_folded (GPtrArray *array, + const gchar *start, + const gchar *end) +{ + gchar *normal; + + normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE); + + /* TODO: Invent time machine. Converse with Mustafa Ataturk... */ + if (strstr (normal, "ı") || strstr (normal, "İ")) + { + gchar *s = normal; + GString *tmp; + + tmp = g_string_new (NULL); + + while (*s) + { + gchar *i, *I, *e; + + i = strstr (s, "ı"); + I = strstr (s, "İ"); + + if (!i && !I) + break; + else if (i && !I) + e = i; + else if (I && !i) + e = I; + else if (i < I) + e = i; + else + e = I; + + g_string_append_len (tmp, s, e - s); + g_string_append_c (tmp, 'i'); + s = g_utf8_next_char (e); + } + + g_string_append (tmp, s); + g_free (normal); + normal = g_string_free (tmp, FALSE); + } + + g_ptr_array_add (array, g_utf8_casefold (normal, -1)); + g_free (normal); +} + +static gchar ** +split_words (const gchar *value) +{ + const gchar *start = NULL; + GPtrArray *result; + const gchar *s; + + result = g_ptr_array_new (); + + for (s = value; *s; s = g_utf8_next_char (s)) + { + gunichar c = g_utf8_get_char (s); + + if (start == NULL) + { + if (g_unichar_isalnum (c) || g_unichar_ismark (c)) + start = s; + } + else + { + if (!g_unichar_isalnum (c) && !g_unichar_ismark (c)) + { + index_add_folded (result, start, s); + start = NULL; + } + } + } + + if (start) + index_add_folded (result, start, s); + + g_ptr_array_add (result, NULL); + + return (gchar **) g_ptr_array_free (result, FALSE); +} + +/** + * g_str_tokenize_and_fold: + * @string: a string + * @translit_locale: (allow-none): the language code (like 'de' or + * 'en_GB') from which @string originates + * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a + * return location for ASCII alternates + * + * Tokenises @string and performs folding on each token. + * + * A token is a non-empty sequence of alphanumeric characters in the + * source string, separated by non-alphanumeric characters. An + * "alphanumeric" character for this purpose is one that matches + * g_unichar_isalnum() or g_unichar_ismark(). + * + * Each token is then (Unicode) normalised and case-folded. If + * @ascii_alternates is non-%NULL and some of the returned tokens + * contain non-ASCII characters, ASCII alternatives will be generated. + * + * The number of ASCII alternatives that are generated and the method + * for doing so is unspecified, but @translit_locale (if specified) may + * improve the transliteration if the language of the source string is + * known. + * + * Returns: (transfer full) (array zero-terminated=1): the folded tokens + * + * Since: 2.40 + **/ +gchar ** +g_str_tokenize_and_fold (const gchar *string, + const gchar *translit_locale, + gchar ***ascii_alternates) +{ + gchar **result; + + g_return_val_if_fail (string != NULL, NULL); + + if (ascii_alternates && g_str_is_ascii (string)) + { + *ascii_alternates = g_new0 (gchar *, 0 + 1); + ascii_alternates = NULL; + } + + result = split_words (string); + + if (ascii_alternates) + { + gint i, j, n; + + n = g_strv_length (result); + *ascii_alternates = g_new (gchar *, n + 1); + j = 0; + + for (i = 0; i < n; i++) + { + if (!g_str_is_ascii (result[i])) + { + gchar *composed; + gchar *ascii; + gint k; + + composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE); + + ascii = g_str_to_ascii (composed, translit_locale); + + /* Only accept strings that are now entirely alnums */ + for (k = 0; ascii[k]; k++) + if (!g_ascii_isalnum (ascii[k])) + break; + + if (ascii[k] == '\0') + /* Made it to the end... */ + (*ascii_alternates)[j++] = ascii; + else + g_free (ascii); + + g_free (composed); + } + } + + (*ascii_alternates)[j] = NULL; + } + + return result; +} + +/** + * g_str_match_string: + * @search_term: the search term from the user + * @potential_hit: the text that may be a hit + * @accept_alternates: %TRUE to accept ASCII alternates + * + * Checks if a search conducted for @search_term should match + * @potential_hit. + * + * This function calls g_str_tokenize_and_fold() on both + * @search_term and @potential_hit. ASCII alternates are never taken + * for @search_term but will be taken for @potential_hit according to + * the value of @accept_alternates. + * + * A hit occurs when each folded token in @search_term is a prefix of a + * folded token from @potential_hit. + * + * Depending on how you're performing the search, it will typically be + * faster to call g_str_tokenize_and_fold() on each string in + * your corpus and build an index on the returned folded tokens, then + * call g_str_tokenize_and_fold() on the search term and + * perform lookups into that index. + * + * As some examples, searching for "fred" would match the potential hit + * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match + * "Frédéric" but not "Frederic" (due to the one-directional nature of + * accent matching). Searching "fo" would match "Foo" and "Bar Foo + * Baz", but not "SFO" (because no word as "fo" as a prefix). + * + * Returns: %TRUE if @potential_hit is a hit + * + * Since: 2.40 + **/ +gboolean +g_str_match_string (const gchar *search_term, + const gchar *potential_hit, + gboolean accept_alternates) +{ + gchar **alternates = NULL; + gchar **term_tokens; + gchar **hit_tokens; + gboolean matched; + gint i, j; + + g_return_val_if_fail (search_term != NULL, FALSE); + g_return_val_if_fail (potential_hit != NULL, FALSE); + + term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL); + hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL); + + matched = TRUE; + + for (i = 0; term_tokens[i]; i++) + { + for (j = 0; hit_tokens[j]; j++) + if (g_str_has_prefix (hit_tokens[j], term_tokens[i])) + goto one_matched; + + if (accept_alternates) + for (j = 0; alternates[j]; j++) + if (g_str_has_prefix (alternates[j], term_tokens[i])) + goto one_matched; + + matched = FALSE; + break; + +one_matched: + continue; + } + + g_strfreev (term_tokens); + g_strfreev (hit_tokens); + g_strfreev (alternates); + + return matched; +}