* Converts from an integer character offset to a pointer to a position
* within the string.
*
+ * Since 2.10, this function allows to pass a negative @offset to
+ * step backwards. It is usually worth stepping backwards from the end
+ * instead of forwards if @offset is in the last fourth of the string,
+ * since moving forward is about 3 times faster than moving backward.
+ *
* Return value: the resulting pointer
**/
gchar *
glong offset)
{
const gchar *s = str;
- while (offset--)
- s = g_utf8_next_char (s);
-
+
+ if (offset > 0)
+ while (offset--)
+ s = g_utf8_next_char (s);
+ else
+ {
+ const char *s1;
+
+ /* This nice technique for fast backwards stepping
+ * through a UTF-8 string was dubbed "stutter stepping"
+ * by its inventor, Larry Ewing.
+ */
+ while (offset)
+ {
+ s1 = s;
+ s += offset;
+ while ((*s & 0xc0) == 0x80)
+ s--;
+
+ offset += g_utf8_pointer_to_offset (s, s1);
+ }
+ }
+
return (gchar *)s;
}
*
* Converts from a pointer to position within a string to a integer
* character offset.
+ *
+ * Since 2.10, this function allows @pos to be before @str, and returns
+ * a negative offset in this case.
*
* Return value: the resulting character offset
**/
{
const gchar *s = str;
glong offset = 0;
-
- while (s < pos)
- {
- s = g_utf8_next_char (s);
- offset++;
- }
+ if (pos < str)
+ offset = - g_utf8_pointer_to_offset (pos, str);
+ else
+ while (s < pos)
+ {
+ s = g_utf8_next_char (s);
+ offset++;
+ }
+
return offset;
}
/**
* g_ucs4_to_utf8:
* @str: a UCS-4 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- * the string is terminated with a 0 character.
+ * @len: the maximum length (number of characters) of @str to use.
+ * If @len < 0, then the string is terminated with a 0 character.
* @items_read: location to store number of characters read, or %NULL.
* @items_written: location to store number of bytes written or %NULL.
* The value here stored does not include the trailing 0
/**
* g_utf16_to_utf8:
* @str: a UTF-16 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- * the string is terminated with a 0 character.
+ * @len: the maximum length (number of <type>gunichar2</type>) of @str to use.
+ * If @len < 0, then the string is terminated with a 0 character.
* @items_read: location to store number of words read, or %NULL.
* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
* returned in case @str contains a trailing partial
/**
* g_utf16_to_ucs4:
* @str: a UTF-16 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- * the string is terminated with a 0 character.
+ * @len: the maximum length (number of <type>gunichar2</type>) of @str to use.
+ * If @len < 0, then the string is terminated with a 0 character.
* @items_read: location to store number of words read, or %NULL.
* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
* returned in case @str contains a trailing partial
/**
* g_utf8_to_utf16:
* @str: a UTF-8 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- * the string is nul-terminated.
+ * @len: the maximum length (number of characters) of @str to use.
+ * If @len < 0, then the string is nul-terminated.
* @items_read: location to store number of bytes read, or %NULL.
* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
* returned in case @str contains a trailing partial
* character. If an error occurs then the index of the
* invalid input is stored here.
- * @items_written: location to store number of words written, or %NULL.
- * The value stored here does not include the trailing
- * 0 word.
+ * @items_written: location to store number of <type>gunichar2</type> written,
+ * or %NULL.
+ * The value stored here does not include the trailing 0.
* @error: location to store the error occuring, or %NULL to ignore
* errors. Any of the errors in #GConvertError other than
* %G_CONVERT_ERROR_NO_CONVERSION may occur.
*
- * Convert a string from UTF-8 to UTF-16. A 0 word will be
+ * Convert a string from UTF-8 to UTF-16. A 0 character will be
* added to the result after the converted text.
*
* Return value: a pointer to a newly allocated UTF-16 string.
/**
* g_ucs4_to_utf16:
* @str: a UCS-4 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- * the string is terminated with a 0 character.
+ * @len: the maximum length (number of characters) of @str to use.
+ * If @len < 0, then the string is terminated with a 0 character.
* @items_read: location to store number of bytes read, or %NULL.
* If an error occurs then the index of the invalid input
* is stored here.
- * @items_written: location to store number of words written, or %NULL.
- * The value stored here does not include the trailing
- * 0 word.
+ * @items_written: location to store number of <type>gunichar2</type>
+ * written, or %NULL. The value stored here does not
+ * include the trailing 0.
* @error: location to store the error occuring, or %NULL to ignore
* errors. Any of the errors in #GConvertError other than
* %G_CONVERT_ERROR_NO_CONVERSION may occur.
*
- * Convert a string from UCS-4 to UTF-16. A 0 word will be
+ * Convert a string from UCS-4 to UTF-16. A 0 character will be
* added to the result after the converted text.
*
* Return value: a pointer to a newly allocated UTF-16 string.