1 #undef G_DISABLE_ASSERT
10 static gint exit_status = 0;
13 croak (char *format, ...)
17 va_start (va, format);
18 vfprintf (stderr, format, va);
25 fail (char *format, ...)
29 va_start (va, format);
30 vfprintf (stderr, format, va);
46 ucs4_equal (gunichar *a, gunichar *b)
48 while (*a && *b && (*a == *b))
58 utf16_equal (gunichar2 *a, gunichar2 *b)
60 while (*a && *b && (*a == *b))
70 utf16_count (gunichar2 *a)
88 gboolean is_valid = g_utf8_validate (utf8, -1, &end);
90 glong items_read, items_written;
97 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
107 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
113 if (status == INCOMPLETE)
115 gunichar *ucs4_result;
117 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
119 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
121 fail ("line %d: incomplete input not properly detected\n", line);
124 g_clear_error (&error);
126 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
128 if (!ucs4_result || items_read == strlen (utf8))
130 fail ("line %d: incomplete input not properly detected\n", line);
134 g_free (ucs4_result);
137 if (status == VALID || status == NOTUNICODE)
139 gunichar *ucs4_result;
142 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
145 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
149 if (!ucs4_equal (ucs4_result, ucs4) ||
150 items_read != strlen (utf8) ||
151 items_written != ucs4_len)
153 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
157 g_free (ucs4_result);
159 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
161 if (!ucs4_equal (ucs4_result, ucs4) ||
162 items_written != ucs4_len)
164 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
168 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
171 fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
175 if (strcmp (utf8_result, utf8) != 0 ||
176 items_read != ucs4_len ||
177 items_written != strlen (utf8))
179 fail ("line %d: conversion back to utf8 did not match original\n", line);
183 g_free (utf8_result);
184 g_free (ucs4_result);
189 gunichar2 *utf16_expected_tmp;
190 gunichar2 *utf16_expected;
191 gunichar2 *utf16_from_utf8;
192 gunichar2 *utf16_from_ucs4;
193 gunichar *ucs4_result;
198 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
199 #define TARGET "UTF-16LE"
201 #define TARGET "UTF-16"
204 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
205 NULL, &bytes_written, NULL)))
207 fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
211 /* zero-terminate and remove BOM
213 n_chars = bytes_written / 2;
214 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
217 utf16_expected = g_new (gunichar2, n_chars + 1);
218 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
220 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
222 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
227 utf16_expected = g_new (gunichar2, n_chars + 1);
228 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
231 utf16_expected[n_chars] = '\0';
233 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
235 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
239 if (items_read != strlen (utf8) ||
240 utf16_count (utf16_from_utf8) != items_written)
242 fail ("line %d: length error in conversion to ucs16\n", line);
246 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
248 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
252 if (items_read != ucs4_len ||
253 utf16_count (utf16_from_ucs4) != items_written)
255 fail ("line %d: length error in conversion to ucs16\n", line);
259 if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
260 !utf16_equal (utf16_from_ucs4, utf16_expected))
262 fail ("line %d: results of conversion to ucs16 do not match\n", line);
266 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
268 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
272 if (items_read != utf16_count (utf16_from_utf8) ||
273 items_written != strlen (utf8))
275 fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
279 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
281 fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
285 if (items_read != utf16_count (utf16_from_utf8) ||
286 items_written != ucs4_len)
288 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
292 if (strcmp (utf8, utf8_result) != 0 ||
293 !ucs4_equal (ucs4, ucs4_result))
295 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
299 g_free (utf16_expected_tmp);
300 g_free (utf16_expected);
301 g_free (utf16_from_utf8);
302 g_free (utf16_from_ucs4);
303 g_free (utf8_result);
304 g_free (ucs4_result);
309 main (int argc, char **argv)
311 gchar *srcdir = getenv ("srcdir");
314 GError *error = NULL;
319 gint start_line = 0; /* Quiet GCC */
320 gchar *utf8 = NULL; /* Quiet GCC */
322 Status status = VALID; /* Quiet GCC */
327 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL);
329 g_file_get_contents (testfile, &contents, NULL, &error);
331 croak ("Cannot open utf8.txt: %s", error->message);
333 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
337 /* Loop over lines */
340 while (*p && (*p == ' ' || *p == '\t'))
344 while (*end && (*end != '\r' && *end != '\n'))
347 if (!*p || *p == '#' || *p == '\r' || *p == '\n')
350 tmp = g_strstrip (g_strndup (p, end - p));
363 if (!strcmp (tmp, "VALID"))
365 else if (!strcmp (tmp, "INCOMPLETE"))
367 else if (!strcmp (tmp, "NOTUNICODE"))
369 else if (!strcmp (tmp, "OVERLONG"))
371 else if (!strcmp (tmp, "MALFORMED"))
374 croak ("Invalid status on line %d\n", line);
376 if (status != VALID && status != NOTUNICODE)
377 state++; /* No UCS-4 data */
384 p = strtok (tmp, " \t");
389 gunichar ch = strtoul (p, &endptr, 16);
391 croak ("Invalid UCS-4 character on line %d\n", line);
393 g_array_append_val (ucs4, ch);
395 p = strtok (NULL, " \t");
402 state = (state + 1) % 3;
406 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
407 g_array_set_size (ucs4, 0);
413 if (*p && *p == '\r')
415 if (*p && *p == '\n')