7 static gint exit_status = 0;
10 croak (char *format, ...)
14 va_start (va, format);
15 vfprintf (stderr, format, va);
22 fail (char *format, ...)
26 va_start (va, format);
27 vfprintf (stderr, format, va);
43 ucs4_equal (gunichar *a, gunichar *b)
45 while (*a && *b && (*a == *b))
55 utf16_equal (gunichar2 *a, gunichar2 *b)
57 while (*a && *b && (*a == *b))
67 utf16_count (gunichar2 *a)
85 gboolean is_valid = g_utf8_validate (utf8, -1, &end);
87 glong items_read, items_written;
94 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
104 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
110 if (status == INCOMPLETE)
112 gunichar *ucs4_result;
114 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
116 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
118 fail ("line %d: incomplete input not properly detected\n", line);
121 g_clear_error (&error);
123 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
125 if (!ucs4_result || items_read == strlen (utf8))
127 fail ("line %d: incomplete input not properly detected\n", line);
131 g_free (ucs4_result);
134 if (status == VALID || status == NOTUNICODE)
136 gunichar *ucs4_result;
139 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
142 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
146 if (!ucs4_equal (ucs4_result, ucs4) ||
147 items_read != strlen (utf8) ||
148 items_written != ucs4_len)
150 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
154 g_free (ucs4_result);
156 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
158 if (!ucs4_equal (ucs4_result, ucs4) ||
159 items_written != ucs4_len)
161 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
165 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
168 fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
172 if (strcmp (utf8_result, utf8) != 0 ||
173 items_read != ucs4_len ||
174 items_written != strlen (utf8))
176 fail ("line %d: conversion back to utf8 did not match original\n", line);
180 g_free (utf8_result);
181 g_free (ucs4_result);
186 gunichar2 *utf16_expected_tmp;
187 gunichar2 *utf16_expected;
188 gunichar2 *utf16_from_utf8;
189 gunichar2 *utf16_from_ucs4;
190 gunichar *ucs4_result;
196 #define TARGET "UTF-16LE"
198 #define TARGET "UTF-16"
201 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
202 NULL, &bytes_written, NULL)))
204 fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
208 /* zero-terminate and remove BOM
210 n_chars = bytes_written / 2;
211 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
214 utf16_expected = g_new (gunichar2, n_chars + 1);
215 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
217 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
219 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
224 utf16_expected = g_new (gunichar2, n_chars + 1);
225 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
228 utf16_expected[n_chars] = '\0';
230 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
232 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
236 if (items_read != strlen (utf8) ||
237 utf16_count (utf16_from_utf8) != items_written)
239 fail ("line %d: length error in conversion to ucs16\n", line);
243 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
245 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
249 if (items_read != ucs4_len ||
250 utf16_count (utf16_from_ucs4) != items_written)
252 fail ("line %d: length error in conversion to ucs16\n", line);
256 if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
257 !utf16_equal (utf16_from_ucs4, utf16_expected))
259 fail ("line %d: results of conversion to ucs16 do not match\n", line);
263 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
265 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
269 if (items_read != utf16_count (utf16_from_utf8) ||
270 items_written != strlen (utf8))
272 fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
276 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
278 fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
282 if (items_read != utf16_count (utf16_from_utf8) ||
283 items_written != ucs4_len)
285 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
289 if (strcmp (utf8, utf8_result) != 0 ||
290 !ucs4_equal (ucs4, ucs4_result))
292 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
296 g_free (utf16_expected_tmp);
297 g_free (utf16_expected);
298 g_free (utf16_from_utf8);
299 g_free (utf16_from_ucs4);
300 g_free (utf8_result);
301 g_free (ucs4_result);
306 main (int argc, char **argv)
308 gchar *srcdir = getenv ("srcdir");
311 GError *error = NULL;
316 gint start_line = 0; /* Quiet GCC */
317 gchar *utf8 = NULL; /* Quiet GCC */
319 Status status = VALID; /* Quiet GCC */
324 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL);
326 g_file_get_contents (testfile, &contents, NULL, &error);
328 croak ("Cannot open utf8.txt: %s", error->message);
330 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
334 /* Loop over lines */
337 while (*p && (*p == ' ' || *p == '\t'))
341 while (*end && (*end != '\r' && *end != '\n'))
344 if (!*p || *p == '#' || *p == '\r' || *p == '\n')
347 tmp = g_strstrip (g_strndup (p, end - p));
360 if (!strcmp (tmp, "VALID"))
362 else if (!strcmp (tmp, "INCOMPLETE"))
364 else if (!strcmp (tmp, "NOTUNICODE"))
366 else if (!strcmp (tmp, "OVERLONG"))
368 else if (!strcmp (tmp, "MALFORMED"))
371 croak ("Invalid status on line %d\n", line);
373 if (status != VALID && status != NOTUNICODE)
374 state++; /* No UCS-4 data */
381 p = strtok (tmp, " \t");
386 gunichar ch = strtoul (p, &endptr, 16);
388 croak ("Invalid UCS-4 character on line %d\n", line);
390 g_array_append_val (ucs4, ch);
392 p = strtok (NULL, " \t");
399 state = (state + 1) % 3;
403 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
404 g_array_set_size (ucs4, 0);
410 if (*p && *p == '\r')
412 if (*p && *p == '\n')