Imported Upstream version 2.61.2

[platform/upstream/glib.git] / glib / gutf8.c
diff --git a/glib/gutf8.c b/glib/gutf8.c

index e66e061..a19f720 100644 (file)
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -136,7 +136,7 @@ const gchar * const g_utf8_skip = utf8_skip_data;
   * is made to see if the character found is actually valid other than
   * it starts with an appropriate byte.
   *
- * Returns: a pointer to the found character or %NULL.
+ * Returns: (transfer none) (nullable): a pointer to the found character or %NULL.
   */
  gchar *
  g_utf8_find_prev_char (const char *str,
@@ -162,7 +162,13 @@ g_utf8_find_prev_char (const char *str,
   * is made to see if the character found is actually valid other than
   * it starts with an appropriate byte.
   * 
- * Returns: a pointer to the found character or %NULL
+ * If @end is %NULL, the return value will never be %NULL: if the end of the
+ * string is reached, a pointer to the terminating nul byte is returned. If
+ * @end is non-%NULL, the return value will be %NULL if the end of the string
+ * is reached.
+ *
+ * Returns: (transfer none) (nullable): a pointer to the found character or %NULL if @end is
+ *    set and is reached
   */
  gchar *
  g_utf8_find_next_char (const gchar *p,
@@ -193,7 +199,7 @@ g_utf8_find_next_char (const gchar *p,
   * it starts with an appropriate byte. If @p might be the first
   * character of the string, you must use g_utf8_find_prev_char() instead.
   * 
- * Returns: a pointer to the found character
+ * Returns: (transfer none) (not nullable): a pointer to the found character
   */
  gchar *
  g_utf8_prev_char (const gchar *p)
@@ -269,7 +275,7 @@ g_utf8_strlen (const gchar *p,
   * Copies a substring out of a UTF-8 encoded string.
   * The substring will contain @end_pos - @start_pos characters.
   *
- * Returns: a newly allocated copy of the requested
+ * Returns: (transfer full): a newly allocated copy of the requested
   *     substring. Free with g_free() when no longer needed.
   *
   * Since: 2.30
@@ -338,7 +344,7 @@ g_utf8_get_char (const gchar *p)
   * This limitation exists as this function is called frequently during
   * text rendering and therefore has to be as fast as possible.
   *
- * Returns: the resulting pointer
+ * Returns: (transfer none): the resulting pointer
   */
  gchar *
  g_utf8_offset_to_pointer  (const gchar *str,
@@ -406,7 +412,7 @@ g_utf8_pointer_to_offset (const gchar *str,
  
  /**
   * g_utf8_strncpy:
- * @dest: buffer to fill with characters from @src
+ * @dest: (transfer none): buffer to fill with characters from @src
   * @src: UTF-8 encoded string
   * @n: character count
   * 
@@ -415,7 +421,10 @@ g_utf8_pointer_to_offset (const gchar *str,
   * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
   * text before trying to use UTF-8 utility functions with it.)
   * 
- * Returns: @dest
+ * Note you must ensure @dest is at least 4 * @n to fit the
+ * largest possible UTF-8 characters
+ *
+ * Returns: (transfer none): @dest
   */
  gchar *
  g_utf8_strncpy (gchar       *dest,
@@ -509,7 +518,7 @@ g_unichar_to_utf8 (gunichar c,
   * in a UTF-8 encoded string, while limiting the search to @len bytes.
   * If @len is -1, allow unbounded search.
   * 
- * Returns: %NULL if the string does not contain the character, 
+ * Returns: (transfer none) (nullable): %NULL if the string does not contain the character,
   *     otherwise, a pointer to the start of the leftmost occurrence
   *     of the character in the string.
   */
@@ -537,7 +546,7 @@ g_utf8_strchr (const char *p,
   * in a UTF-8 encoded string, while limiting the search to @len bytes.
   * If @len is -1, allow unbounded search.
   * 
- * Returns: %NULL if the string does not contain the character, 
+ * Returns: (transfer none) (nullable): %NULL if the string does not contain the character,
   *     otherwise, a pointer to the start of the rightmost occurrence
   *     of the character in the string.
   */
@@ -654,6 +663,10 @@ g_utf8_get_char_extended (const  gchar *p,
   * This function checks for incomplete characters, for invalid characters
   * such as characters that are out of the range of Unicode, and for
   * overlong encodings of valid characters.
+ *
+ * Note that g_utf8_get_char_validated() returns (gunichar)-2 if
+ * @max_len is positive and any of the bytes in the first UTF-8 character
+ * sequence are nul.
   * 
   * Returns: the resulting character. If @p points to a partial
   *     sequence at the end of a string that could begin a valid 
@@ -696,7 +709,7 @@ g_utf8_get_char_validated (const gchar *p,
   * but does no error checking on the input. A trailing 0 character
   * will be added to the string after the converted text.
   * 
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
   *     This value must be freed with g_free().
   */
  gunichar *
@@ -821,7 +834,7 @@ try_malloc_n (gsize n_blocks, gsize n_block_bytes, GError **error)
   * representation as UCS-4. A trailing 0 character will be added to the
   * string after the converted text.
   * 
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set.
   */
@@ -902,7 +915,7 @@ g_utf8_to_ucs4 (const gchar *str,
   * Convert a string from a 32-bit fixed width representation as UCS-4.
   * to UTF-8. The result will be terminated with a 0 byte.
   * 
- * Returns: a pointer to a newly allocated UTF-8 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-8 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set. In that case, @items_read
   *     will be set to the position of the first invalid input character.
@@ -989,7 +1002,7 @@ g_ucs4_to_utf8 (const gunichar *str,
   * be correctly interpreted as UTF-16, i.e. it doesn't contain
   * things unpaired surrogates.
   *
- * Returns: a pointer to a newly allocated UTF-8 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-8 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set.
   **/
@@ -1133,7 +1146,7 @@ g_utf16_to_utf8 (const gunichar2  *str,
   * Convert a string from UTF-16 to UCS-4. The result will be
   * nul-terminated.
   * 
- * Returns: a pointer to a newly allocated UCS-4 string.
+ * Returns: (transfer full): a pointer to a newly allocated UCS-4 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set.
   */
@@ -1271,7 +1284,7 @@ g_utf16_to_ucs4 (const gunichar2  *str,
   * Convert a string from UTF-8 to UTF-16. A 0 character will be
   * added to the result after the converted text.
   *
- * Returns: a pointer to a newly allocated UTF-16 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-16 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set.
   */
@@ -1387,7 +1400,7 @@ g_utf8_to_utf16 (const gchar *str,
   * Convert a string from UCS-4 to UTF-16. A 0 character will be
   * added to the result after the converted text.
   * 
- * Returns: a pointer to a newly allocated UTF-16 string.
+ * Returns: (transfer full): a pointer to a newly allocated UTF-16 string.
   *     This value must be freed with g_free(). If an error occurs,
   *     %NULL will be returned and @error set.
   */
@@ -1656,16 +1669,48 @@ g_utf8_validate (const char   *str,
  {
    const gchar *p;
  
-  if (max_len < 0)
-    p = fast_validate (str);
+  if (max_len >= 0)
+    return g_utf8_validate_len (str, max_len, end);
+
+  p = fast_validate (str);
+
+  if (end)
+    *end = p;
+
+  if (*p != '\0')
+    return FALSE;
    else
-    p = fast_validate_len (str, max_len);
+    return TRUE;
+}
+
+/**
+ * g_utf8_validate_len:
+ * @str: (array length=max_len) (element-type guint8): a pointer to character data
+ * @max_len: max bytes to validate
+ * @end: (out) (optional) (transfer none): return location for end of valid data
+ *
+ * Validates UTF-8 encoded text.
+ *
+ * As with g_utf8_validate(), but @max_len must be set, and hence this function
+ * will always return %FALSE if any of the bytes of @str are nul.
+ *
+ * Returns: %TRUE if the text was valid UTF-8
+ * Since: 2.60
+ */
+gboolean
+g_utf8_validate_len (const char   *str,
+                     gsize         max_len,
+                     const gchar **end)
+
+{
+  const gchar *p;
+
+  p = fast_validate_len (str, max_len);
  
    if (end)
      *end = p;
  
-  if ((max_len >= 0 && p != str + max_len) ||
-      (max_len < 0 && *p != '\0'))
+  if (p != str + max_len)
      return FALSE;
    else
      return TRUE;
@@ -1707,7 +1752,7 @@ g_unichar_validate (gunichar ch)
   * newly-allocated memory, which should be freed with g_free() when
   * no longer needed. 
   *
- * Returns: a newly-allocated string which is the reverse of @str
+ * Returns: (transfer full): a newly-allocated string which is the reverse of @str
   *
   * Since: 2.2
   */
@@ -1793,7 +1838,7 @@ g_utf8_make_valid (const gchar *str,
    if (string == NULL)
      return g_strndup (str, len);
    
-  g_string_append (string, remainder);
+  g_string_append_len (string, remainder, remaining_bytes);
    g_string_append_c (string, '\0');
  
    g_assert (g_utf8_validate (string->str, -1, NULL));