* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* is made to see if the character found is actually valid other than
* it starts with an appropriate byte.
*
- * Returns: a pointer to the found character or %NULL.
+ * Returns: (transfer none) (nullable): a pointer to the found character or %NULL.
*/
gchar *
g_utf8_find_prev_char (const char *str,
* is made to see if the character found is actually valid other than
* it starts with an appropriate byte.
*
- * Returns: a pointer to the found character or %NULL
+ * If @end is %NULL, the return value will never be %NULL: if the end of the
+ * string is reached, a pointer to the terminating nul byte is returned. If
+ * @end is non-%NULL, the return value will be %NULL if the end of the string
+ * is reached.
+ *
+ * Returns: (transfer none) (nullable): a pointer to the found character or %NULL if @end is
+ * set and is reached
*/
gchar *
g_utf8_find_next_char (const gchar *p,
* it starts with an appropriate byte. If @p might be the first
* character of the string, you must use g_utf8_find_prev_char() instead.
*
- * Returns: a pointer to the found character
+ * Returns: (transfer none) (not nullable): a pointer to the found character
*/
gchar *
g_utf8_prev_char (const gchar *p)
* Copies a substring out of a UTF-8 encoded string.
* The substring will contain @end_pos - @start_pos characters.
*
- * Returns: a newly allocated copy of the requested
+ * Returns: (transfer full): a newly allocated copy of the requested
* substring. Free with g_free() when no longer needed.
*
* Since: 2.30
* This limitation exists as this function is called frequently during
* text rendering and therefore has to be as fast as possible.
*
- * Returns: the resulting pointer
+ * Returns: (transfer none): the resulting pointer
*/
gchar *
g_utf8_offset_to_pointer (const gchar *str,
/**
* g_utf8_strncpy:
- * @dest: buffer to fill with characters from @src
+ * @dest: (transfer none): buffer to fill with characters from @src
* @src: UTF-8 encoded string
* @n: character count
*
* must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
* text before trying to use UTF-8 utility functions with it.)
*
- * Returns: @dest
+ * Note you must ensure @dest is at least 4 * @n to fit the
+ * largest possible UTF-8 characters
+ *
+ * Returns: (transfer none): @dest
*/
gchar *
g_utf8_strncpy (gchar *dest,
* in a UTF-8 encoded string, while limiting the search to @len bytes.
* If @len is -1, allow unbounded search.
*
- * Returns: %NULL if the string does not contain the character,
+ * Returns: (transfer none) (nullable): %NULL if the string does not contain the character,
* otherwise, a pointer to the start of the leftmost occurrence
* of the character in the string.
*/
* in a UTF-8 encoded string, while limiting the search to @len bytes.
* If @len is -1, allow unbounded search.
*
- * Returns: %NULL if the string does not contain the character,
+ * Returns: (transfer none) (nullable): %NULL if the string does not contain the character,
* otherwise, a pointer to the start of the rightmost occurrence
* of the character in the string.
*/
guint i, len;
gunichar min_code;
gunichar wc = (guchar) *p;
+ const gunichar partial_sequence = (gunichar) -2;
+ const gunichar malformed_sequence = (gunichar) -1;
if (wc < 0x80)
{
}
else if (G_UNLIKELY (wc < 0xc0))
{
- return (gunichar)-1;
+ return malformed_sequence;
}
else if (wc < 0xe0)
{
}
else
{
- return (gunichar)-1;
+ return malformed_sequence;
}
if (G_UNLIKELY (max_len >= 0 && len > max_len))
for (i = 1; i < max_len; i++)
{
if ((((guchar *)p)[i] & 0xc0) != 0x80)
- return (gunichar)-1;
+ return malformed_sequence;
}
- return (gunichar)-2;
+ return partial_sequence;
}
for (i = 1; i < len; ++i)
if (G_UNLIKELY ((ch & 0xc0) != 0x80))
{
if (ch)
- return (gunichar)-1;
+ return malformed_sequence;
else
- return (gunichar)-2;
+ return partial_sequence;
}
wc <<= 6;
}
if (G_UNLIKELY (wc < min_code))
- return (gunichar)-1;
+ return malformed_sequence;
return wc;
}
/**
* g_utf8_get_char_validated:
* @p: a pointer to Unicode character encoded as UTF-8
- * @max_len: the maximum number of bytes to read, or -1, for no maximum or
- * if @p is nul-terminated
- *
+ * @max_len: the maximum number of bytes to read, or -1 if @p is nul-terminated
+ *
* Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
* This function checks for incomplete characters, for invalid characters
* such as characters that are out of the range of Unicode, and for
* overlong encodings of valid characters.
+ *
+ * Note that g_utf8_get_char_validated() returns (gunichar)-2 if
+ * @max_len is positive and any of the bytes in the first UTF-8 character
+ * sequence are nul.
*
* Returns: the resulting character. If @p points to a partial
* sequence at the end of a string that could begin a valid
* but does no error checking on the input. A trailing 0 character
* will be added to the string after the converted text.
*
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
* This value must be freed with g_free().
*/
gunichar *
* representation as UCS-4. A trailing 0 character will be added to the
* string after the converted text.
*
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set.
*/
* Convert a string from a 32-bit fixed width representation as UCS-4.
* to UTF-8. The result will be terminated with a 0 byte.
*
- * Returns: a pointer to a newly allocated UTF-8 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-8 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set. In that case, @items_read
* will be set to the position of the first invalid input character.
* be correctly interpreted as UTF-16, i.e. it doesn't contain
* things unpaired surrogates.
*
- * Returns: a pointer to a newly allocated UTF-8 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-8 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set.
**/
* Convert a string from UTF-16 to UCS-4. The result will be
* nul-terminated.
*
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set.
*/
* Convert a string from UTF-8 to UTF-16. A 0 character will be
* added to the result after the converted text.
*
- * Returns: a pointer to a newly allocated UTF-16 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-16 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set.
*/
* Convert a string from UCS-4 to UTF-16. A 0 character will be
* added to the result after the converted text.
*
- * Returns: a pointer to a newly allocated UTF-16 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-16 string.
* This value must be freed with g_free(). If an error occurs,
* %NULL will be returned and @error set.
*/
* g_utf8_validate:
* @str: (array length=max_len) (element-type guint8): a pointer to character data
* @max_len: max bytes to validate, or -1 to go until NUL
- * @end: (allow-none) (out) (transfer none): return location for end of valid data
+ * @end: (out) (optional) (transfer none): return location for end of valid data
*
* Validates UTF-8 encoded text. @str is the text to validate;
* if @str is nul-terminated, then @max_len can be -1, otherwise
{
const gchar *p;
- if (max_len < 0)
- p = fast_validate (str);
+ if (max_len >= 0)
+ return g_utf8_validate_len (str, max_len, end);
+
+ p = fast_validate (str);
+
+ if (end)
+ *end = p;
+
+ if (*p != '\0')
+ return FALSE;
else
- p = fast_validate_len (str, max_len);
+ return TRUE;
+}
+
+/**
+ * g_utf8_validate_len:
+ * @str: (array length=max_len) (element-type guint8): a pointer to character data
+ * @max_len: max bytes to validate
+ * @end: (out) (optional) (transfer none): return location for end of valid data
+ *
+ * Validates UTF-8 encoded text.
+ *
+ * As with g_utf8_validate(), but @max_len must be set, and hence this function
+ * will always return %FALSE if any of the bytes of @str are nul.
+ *
+ * Returns: %TRUE if the text was valid UTF-8
+ * Since: 2.60
+ */
+gboolean
+g_utf8_validate_len (const char *str,
+ gsize max_len,
+ const gchar **end)
+
+{
+ const gchar *p;
+
+ p = fast_validate_len (str, max_len);
if (end)
*end = p;
- if ((max_len >= 0 && p != str + max_len) ||
- (max_len < 0 && *p != '\0'))
+ if (p != str + max_len)
return FALSE;
else
return TRUE;
* newly-allocated memory, which should be freed with g_free() when
* no longer needed.
*
- * Returns: a newly-allocated string which is the reverse of @str
+ * Returns: (transfer full): a newly-allocated string which is the reverse of @str
*
* Since: 2.2
*/
/**
* g_utf8_make_valid:
* @str: string to coerce into UTF-8
+ * @len: the maximum length of @str to use, in bytes. If @len < 0,
+ * then the string is nul-terminated.
*
* If the provided string is valid UTF-8, return a copy of it. If not,
* return a copy in which bytes that could not be interpreted as valid Unicode
* Since: 2.52
*/
gchar *
-g_utf8_make_valid (const gchar *str)
+g_utf8_make_valid (const gchar *str,
+ gssize len)
{
GString *string;
const gchar *remainder, *invalid;
- gint remaining_bytes, valid_bytes;
+ gsize remaining_bytes, valid_bytes;
g_return_val_if_fail (str != NULL, NULL);
+ if (len < 0)
+ len = strlen (str);
+
string = NULL;
remainder = str;
- remaining_bytes = strlen (str);
+ remaining_bytes = len;
while (remaining_bytes != 0)
{
}
if (string == NULL)
- return g_strdup (str);
+ return g_strndup (str, len);
- g_string_append (string, remainder);
+ g_string_append_len (string, remainder, remaining_bytes);
+ g_string_append_c (string, '\0');
g_assert (g_utf8_validate (string->str, -1, NULL));
-
+
return g_string_free (string, FALSE);
}