1 #undef G_DISABLE_ASSERT
10 static gint exit_status = 0;
14 croak (char *format, ...)
18 va_start (va, format);
19 vfprintf (stderr, format, va);
27 fail (char *format, ...)
31 va_start (va, format);
32 vfprintf (stderr, format, va);
48 ucs4_equal (gunichar *a, gunichar *b)
50 while (*a && *b && (*a == *b))
60 utf16_equal (gunichar2 *a, gunichar2 *b)
62 while (*a && *b && (*a == *b))
72 utf16_count (gunichar2 *a)
83 print_ucs4 (const gchar *prefix, gunichar *ucs4, gint ucs4_len)
86 g_print ("%s ", prefix);
87 for (i = 0; i < ucs4_len; i++)
88 g_print ("%x ", ucs4[i]);
100 gboolean is_valid = g_utf8_validate (utf8, -1, &end);
101 GError *error = NULL;
102 glong items_read, items_written;
109 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
119 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
125 if (status == INCOMPLETE)
127 gunichar *ucs4_result;
129 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
131 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
133 fail ("line %d: incomplete input not properly detected\n", line);
136 g_clear_error (&error);
138 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
140 if (!ucs4_result || items_read == (glong) strlen (utf8))
142 fail ("line %d: incomplete input not properly detected\n", line);
146 g_free (ucs4_result);
149 if (status == VALID || status == NOTUNICODE)
151 gunichar *ucs4_result;
153 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
156 fail ("line %d: conversion with status %d to ucs4 failed: %s\n", line, status, error->message);
160 if (!ucs4_equal (ucs4_result, ucs4) ||
161 items_read != (glong) strlen (utf8) ||
162 items_written != ucs4_len)
164 fail ("line %d: results of conversion with status %d to ucs4 do not match expected.\n", line, status);
165 print_ucs4 ("expected: ", ucs4, ucs4_len);
166 print_ucs4 ("received: ", ucs4_result, items_written);
170 g_free (ucs4_result);
175 gunichar *ucs4_result;
178 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
180 if (!ucs4_equal (ucs4_result, ucs4) ||
181 items_written != ucs4_len)
183 fail ("line %d: results of fast conversion with status %d to ucs4 do not match expected.\n", line, status);
184 print_ucs4 ("expected: ", ucs4, ucs4_len);
185 print_ucs4 ("received: ", ucs4_result, items_written);
189 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
192 fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
196 if (strcmp (utf8_result, utf8) != 0 ||
197 items_read != ucs4_len ||
198 items_written != (glong) strlen (utf8))
200 fail ("line %d: conversion back to utf8 did not match original\n", line);
204 g_free (utf8_result);
205 g_free (ucs4_result);
210 gunichar2 *utf16_expected_tmp;
211 gunichar2 *utf16_expected;
212 gunichar2 *utf16_from_utf8;
213 gunichar2 *utf16_from_ucs4;
214 gunichar *ucs4_result;
219 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
220 #define TARGET "UTF-16LE"
222 #define TARGET "UTF-16"
225 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
226 NULL, &bytes_written, NULL)))
228 fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
232 /* zero-terminate and remove BOM
234 n_chars = bytes_written / 2;
235 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
238 utf16_expected = g_new (gunichar2, n_chars + 1);
239 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
241 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
243 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
248 utf16_expected = g_new (gunichar2, n_chars + 1);
249 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
252 utf16_expected[n_chars] = '\0';
254 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
256 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
260 if (items_read != (glong) strlen (utf8) ||
261 utf16_count (utf16_from_utf8) != items_written)
263 fail ("line %d: length error in conversion to ucs16\n", line);
267 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
269 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
273 if (items_read != ucs4_len ||
274 utf16_count (utf16_from_ucs4) != items_written)
276 fail ("line %d: length error in conversion to ucs16\n", line);
280 if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
281 !utf16_equal (utf16_from_ucs4, utf16_expected))
283 fail ("line %d: results of conversion to ucs16 do not match\n", line);
287 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
289 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
293 if (items_read != utf16_count (utf16_from_utf8) ||
294 items_written != (glong) strlen (utf8))
296 fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
300 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
302 fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
306 if (items_read != utf16_count (utf16_from_utf8) ||
307 items_written != ucs4_len)
309 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
313 if (strcmp (utf8, utf8_result) != 0 ||
314 !ucs4_equal (ucs4, ucs4_result))
316 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
320 g_free (utf16_expected_tmp);
321 g_free (utf16_expected);
322 g_free (utf16_from_utf8);
323 g_free (utf16_from_ucs4);
324 g_free (utf8_result);
325 g_free (ucs4_result);
330 main (int argc, char **argv)
334 GError *error = NULL;
339 gint start_line = 0; /* Quiet GCC */
340 gchar *utf8 = NULL; /* Quiet GCC */
342 Status status = VALID; /* Quiet GCC */
344 g_test_init (&argc, &argv, NULL);
346 testfile = g_test_build_filename (G_TEST_DIST, "utf8.txt", NULL);
348 g_file_get_contents (testfile, &contents, NULL, &error);
350 croak ("Cannot open utf8.txt: %s", error->message);
352 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
356 /* Loop over lines */
359 while (*p && (*p == ' ' || *p == '\t'))
363 while (*end && (*end != '\r' && *end != '\n'))
366 if (!*p || *p == '#' || *p == '\r' || *p == '\n')
369 tmp = g_strstrip (g_strndup (p, end - p));
382 if (!strcmp (tmp, "VALID"))
384 else if (!strcmp (tmp, "INCOMPLETE"))
386 else if (!strcmp (tmp, "NOTUNICODE"))
388 else if (!strcmp (tmp, "OVERLONG"))
390 else if (!strcmp (tmp, "MALFORMED"))
393 croak ("Invalid status on line %d\n", line);
395 if (status != VALID && status != NOTUNICODE)
396 state++; /* No UCS-4 data */
403 p = strtok (tmp, " \t");
408 gunichar ch = strtoul (p, &endptr, 16);
410 croak ("Invalid UCS-4 character on line %d\n", line);
412 g_array_append_val (ucs4, ch);
414 p = strtok (NULL, " \t");
421 state = (state + 1) % 3;
425 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
426 g_array_set_size (ucs4, 0);
432 if (*p && *p == '\r')
434 if (*p && *p == '\n')
441 g_array_free (ucs4, TRUE);