+ if (items_written)
+ *items_written = p - result;
+
+ err_out:
+ if (items_read)
+ *items_read = i;
+
+ return result;
+}
+
+#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
+
+/**
+ * g_utf16_to_utf8:
+ * @str: a UTF-16 encoded string
+ * @len: the maximum length of @str to use. If < 0, then
+ * the string is terminated with a 0 character.
+ * @items_read: location to store number of words read, or %NULL.
+ * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
+ * returned in case @str contains a trailing partial
+ * character. If an error occurs then the index of the
+ * invalid input is stored here.
+ * @items_written: location to store number of bytes written, or %NULL.
+ * The value stored here does not include the trailing
+ * 0 byte.
+ * @error: location to store the error occuring, or %NULL to ignore
+ * errors. Any of the errors in #GConvertError other than
+ * %G_CONVERT_ERROR_NO_CONVERSION may occur.
+ *
+ * Convert a string from UTF-16 to UTF-8. The result will be
+ * terminated with a 0 byte.
+ *
+ * Return value: a pointer to a newly allocated UTF-8 string.
+ * This value must be freed with g_free(). If an
+ * error occurs, %NULL will be returned and
+ * @error set.
+ **/
+gchar *
+g_utf16_to_utf8 (const gunichar2 *str,
+ gint len,
+ gint *items_read,
+ gint *items_written,
+ GError **error)
+{
+ /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
+ * are marked.
+ */
+ const gunichar2 *in;
+ gchar *out;
+ gchar *result = NULL;
+ gint n_bytes;
+ gunichar high_surrogate;
+
+ g_return_val_if_fail (str != 0, NULL);
+
+ n_bytes = 0;
+ in = str;
+ high_surrogate = 0;
+ while ((len < 0 || in - str < len) && *in)
+ {
+ gunichar2 c = *in;
+ gunichar wc;
+
+ if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
+ {
+ if (high_surrogate)
+ {
+ wc = SURROGATE_VALUE (high_surrogate, c);
+ high_surrogate = 0;
+ }
+ else
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+ goto err_out;
+ }
+ }
+ else
+ {
+ if (high_surrogate)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+ goto err_out;
+ }
+
+ if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
+ {
+ high_surrogate = c;
+ goto next1;
+ }
+ else
+ wc = c;
+ }
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ n_bytes += UTF8_LENGTH (wc);
+
+ next1:
+ in++;
+ }
+
+ if (high_surrogate && !items_read)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
+ _("Partial character sequence at end of input"));
+ goto err_out;
+ }
+
+ /* At this point, everything is valid, and we just need to convert
+ */
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ result = g_malloc (n_bytes + 1);
+
+ high_surrogate = 0;
+ out = result;
+ in = str;
+ while (out < result + n_bytes)
+ {
+ gunichar2 c = *in;
+ gunichar wc;
+
+ if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
+ {
+ wc = SURROGATE_VALUE (high_surrogate, c);
+ high_surrogate = 0;
+ }
+ else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
+ {
+ high_surrogate = c;
+ goto next2;
+ }
+ else
+ wc = c;
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ out += g_unichar_to_utf8 (wc, out);
+
+ next2:
+ in++;
+ }
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ *out = '\0';
+
+ if (items_written)
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ *items_written = out - result;
+
+ err_out:
+ if (items_read)
+ *items_read = in - str;
+
+ return result;
+}
+
+/**
+ * g_utf16_to_ucs4:
+ * @str: a UTF-16 encoded string
+ * @len: the maximum length of @str to use. If < 0, then
+ * the string is terminated with a 0 character.
+ * @items_read: location to store number of words read, or %NULL.
+ * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
+ * returned in case @str contains a trailing partial
+ * character. If an error occurs then the index of the
+ * invalid input is stored here.
+ * @items_written: location to store number of characters written, or %NULL.
+ * The value stored here does not include the trailing
+ * 0 character.
+ * @error: location to store the error occuring, or %NULL to ignore
+ * errors. Any of the errors in #GConvertError other than
+ * %G_CONVERT_ERROR_NO_CONVERSION may occur.
+ *
+ * Convert a string from UTF-16 to UCS-4. The result will be
+ * terminated with a 0 character.
+ *
+ * Return value: a pointer to a newly allocated UCS-4 string.
+ * This value must be freed with g_free(). If an
+ * error occurs, %NULL will be returned and
+ * @error set.
+ **/
+gunichar *
+g_utf16_to_ucs4 (const gunichar2 *str,
+ gint len,
+ gint *items_read,
+ gint *items_written,
+ GError **error)
+{
+ const gunichar2 *in;
+ gchar *out;
+ gchar *result = NULL;
+ gint n_bytes;
+ gunichar high_surrogate;
+
+ g_return_val_if_fail (str != 0, NULL);
+
+ n_bytes = 0;
+ in = str;
+ high_surrogate = 0;
+ while ((len < 0 || in - str < len) && *in)
+ {
+ gunichar2 c = *in;
+ gunichar wc;
+
+ if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
+ {
+ if (high_surrogate)
+ {
+ wc = SURROGATE_VALUE (high_surrogate, c);
+ high_surrogate = 0;
+ }
+ else
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+ goto err_out;
+ }
+ }
+ else
+ {
+ if (high_surrogate)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+ goto err_out;
+ }
+
+ if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
+ {
+ high_surrogate = c;
+ goto next1;
+ }
+ else
+ wc = c;
+ }
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ n_bytes += sizeof (gunichar);
+
+ next1:
+ in++;
+ }
+
+ if (high_surrogate && !items_read)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
+ _("Partial character sequence at end of input"));
+ goto err_out;
+ }
+
+ /* At this point, everything is valid, and we just need to convert
+ */
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ result = g_malloc (n_bytes + 4);
+
+ high_surrogate = 0;
+ out = result;
+ in = str;
+ while (out < result + n_bytes)
+ {
+ gunichar2 c = *in;
+ gunichar wc;
+
+ if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
+ {
+ wc = SURROGATE_VALUE (high_surrogate, c);
+ high_surrogate = 0;
+ }
+ else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
+ {
+ high_surrogate = c;
+ goto next2;
+ }
+ else
+ wc = c;
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ *(gunichar *)out = wc;
+ out += sizeof (gunichar);
+
+ next2:
+ in++;
+ }
+
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ *(gunichar *)out = 0;
+
+ if (items_written)
+ /********** DIFFERENT for UTF8/UCS4 **********/
+ *items_written = (out - result) / sizeof (gunichar);
+
+ err_out:
+ if (items_read)
+ *items_read = in - str;
+
+ return (gunichar *)result;
+}
+
+/**
+ * g_utf8_to_utf16:
+ * @str: a UTF-8 encoded string
+ * @len: the maximum length of @str to use. If < 0, then
+ * the string is %NULL terminated.
+
+ * @items_read: location to store number of bytes read, or %NULL.
+ * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
+ * returned in case @str contains a trailing partial
+ * character. If an error occurs then the index of the
+ * invalid input is stored here.
+ * @items_written: location to store number of words written, or %NULL.
+ * The value stored here does not include the trailing
+ * 0 word.
+ * @error: location to store the error occuring, or %NULL to ignore
+ * errors. Any of the errors in #GConvertError other than
+ * %G_CONVERT_ERROR_NO_CONVERSION may occur.
+ *
+ * Convert a string from UTF-8 to UTF-16. A 0 word will be
+ * added to the result after the converted text.
+ *
+ * Return value: a pointer to a newly allocated UTF-16 string.
+ * This value must be freed with g_free(). If an
+ * error occurs, %NULL will be returned and
+ * @error set.
+ **/
+gunichar2 *
+g_utf8_to_utf16 (const gchar *str,
+ gint len,
+ gint *items_read,
+ gint *items_written,
+ GError **error)
+{
+ gunichar2 *result = NULL;
+ gint n16;
+ const gchar *in;
+ gint i;
+
+ g_return_val_if_fail (str != NULL, NULL);
+
+ in = str;
+ n16 = 0;
+ while ((len < 0 || str + len - in > 0) && *in)
+ {
+ gunichar wc = g_utf8_get_char_extended (in, str + len - in);
+ if (wc & 0x80000000)
+ {
+ if (wc == (gunichar)-2)
+ {
+ if (items_read)
+ break;
+ else
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
+ _("Partial character sequence at end of input"));
+ }
+ else
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid byte sequence in conversion input"));
+
+ goto err_out;
+ }
+
+ if (wc < 0xd800)
+ n16 += 1;
+ else if (wc < 0xe000)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+
+ goto err_out;
+ }
+ else if (wc < 0x10000)
+ n16 += 1;
+ else if (wc < 0x110000)
+ n16 += 2;
+ else
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Character out of range for UTF-16"));
+
+ goto err_out;
+ }
+
+ in = g_utf8_next_char (in);
+ }
+
+ result = g_new (gunichar2, n16 + 1);
+
+ in = str;
+ for (i = 0; i < n16;)
+ {
+ gunichar wc = g_utf8_get_char (in);
+
+ if (wc < 0x10000)
+ {
+ result[i++] = wc;
+ }
+ else
+ {
+ result[i++] = (wc - 0x10000) / 0x400 + 0xd800;
+ result[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
+ }
+
+ in = g_utf8_next_char (in);
+ }
+
+ result[i] = 0;
+
+ if (items_written)
+ *items_written = n16;
+
+ err_out:
+ if (items_read)
+ *items_read = in - str;
+
+ return result;
+}
+
+/**
+ * g_ucs4_to_utf16:
+ * @str: a UCS-4 encoded string
+ * @len: the maximum length of @str to use. If < 0, then
+ * the string is terminated with a zero character.
+ * @items_read: location to store number of bytes read, or %NULL.
+ * If an error occurs then the index of the invalid input
+ * is stored here.
+ * @items_written: location to store number of words written, or %NULL.
+ * The value stored here does not include the trailing
+ * 0 word.
+ * @error: location to store the error occuring, or %NULL to ignore
+ * errors. Any of the errors in #GConvertError other than
+ * %G_CONVERT_ERROR_NO_CONVERSION may occur.
+ *
+ * Convert a string from UCS-4 to UTF-16. A 0 word will be
+ * added to the result after the converted text.
+ *
+ * Return value: a pointer to a newly allocated UTF-16 string.
+ * This value must be freed with g_free(). If an
+ * error occurs, %NULL will be returned and
+ * @error set.
+ **/
+gunichar2 *
+g_ucs4_to_utf16 (const gunichar *str,
+ gint len,
+ gint *items_read,
+ gint *items_written,
+ GError **error)
+{
+ gunichar2 *result = NULL;
+ gint n16;
+ gint i, j;
+
+ n16 = 0;
+ i = 0;
+ while ((len < 0 || i < len) && str[i])
+ {
+ gunichar wc = str[i];
+
+ if (wc < 0xd800)
+ n16 += 1;
+ else if (wc < 0xe000)
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Invalid sequence in conversion input"));
+
+ goto err_out;
+ }
+ else if (wc < 0x10000)
+ n16 += 1;
+ else if (wc < 0x110000)
+ n16 += 2;
+ else
+ {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Character out of range for UTF-16"));
+
+ goto err_out;
+ }
+
+ i++;
+ }
+
+ result = g_new (gunichar2, n16 + 1);
+
+ for (i = 0, j = 0; j < n16; i++)
+ {
+ gunichar wc = str[i];
+
+ if (wc < 0x10000)
+ {
+ result[j++] = wc;
+ }
+ else
+ {
+ result[j++] = (wc - 0x10000) / 0x400 + 0xd800;
+ result[j++] = (wc - 0x10000) % 0x400 + 0xdc00;
+ }
+ }
+ result[j] = 0;
+
+ if (items_written)
+ *items_written = n16;
+
+ err_out:
+ if (items_read)
+ *items_read = i;
+