1 #undef G_DISABLE_ASSERT
10 static gint exit_status = 0;
13 croak (char *format, ...)
17 va_start (va, format);
18 vfprintf (stderr, format, va);
25 fail (char *format, ...)
29 va_start (va, format);
30 vfprintf (stderr, format, va);
46 ucs4_equal (gunichar *a, gunichar *b)
48 while (*a && *b && (*a == *b))
58 utf16_equal (gunichar2 *a, gunichar2 *b)
60 while (*a && *b && (*a == *b))
70 utf16_count (gunichar2 *a)
88 gboolean is_valid = g_utf8_validate (utf8, -1, &end);
90 glong items_read, items_written;
97 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
107 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
113 if (status == INCOMPLETE)
115 gunichar *ucs4_result;
117 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
119 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
121 fail ("line %d: incomplete input not properly detected\n", line);
124 g_clear_error (&error);
126 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
128 if (!ucs4_result || items_read == strlen (utf8))
130 fail ("line %d: incomplete input not properly detected\n", line);
134 g_free (ucs4_result);
137 if (status == VALID || status == NOTUNICODE)
139 gunichar *ucs4_result;
142 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
145 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
149 if (!ucs4_equal (ucs4_result, ucs4) ||
150 items_read != strlen (utf8) ||
151 items_written != ucs4_len)
153 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
157 g_free (ucs4_result);
159 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
161 if (!ucs4_equal (ucs4_result, ucs4) ||
162 items_written != ucs4_len)
164 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
168 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
171 fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
175 if (strcmp (utf8_result, utf8) != 0 ||
176 items_read != ucs4_len ||
177 items_written != strlen (utf8))
179 fail ("line %d: conversion back to utf8 did not match original\n", line);
183 g_free (utf8_result);
184 g_free (ucs4_result);
189 gunichar2 *utf16_expected_tmp;
190 gunichar2 *utf16_expected;
191 gunichar2 *utf16_from_utf8;
192 gunichar2 *utf16_from_ucs4;
193 gunichar *ucs4_result;
198 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
199 #define TARGET "UTF-16LE"
201 #define TARGET "UTF-16"
204 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
205 NULL, &bytes_written, NULL)))
207 fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
211 /* zero-terminate and remove BOM
213 n_chars = bytes_written / 2;
214 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
217 utf16_expected = g_new (gunichar2, n_chars + 1);
218 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
220 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
222 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
227 utf16_expected = g_new (gunichar2, n_chars + 1);
228 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
231 utf16_expected[n_chars] = '\0';
233 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
235 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
239 if (items_read != strlen (utf8) ||
240 utf16_count (utf16_from_utf8) != items_written)
242 fail ("line %d: length error in conversion to ucs16\n", line);
246 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
248 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
252 if (items_read != ucs4_len ||
253 utf16_count (utf16_from_ucs4) != items_written)
255 fail ("line %d: length error in conversion to ucs16\n", line);
259 if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
260 !utf16_equal (utf16_from_ucs4, utf16_expected))
262 fail ("line %d: results of conversion to ucs16 do not match\n", line);
266 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
268 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
272 if (items_read != utf16_count (utf16_from_utf8) ||
273 items_written != strlen (utf8))
275 fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
279 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
281 fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
285 if (items_read != utf16_count (utf16_from_utf8) ||
286 items_written != ucs4_len)
288 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
292 if (strcmp (utf8, utf8_result) != 0 ||
293 !ucs4_equal (ucs4, ucs4_result))
295 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
299 g_free (utf16_expected_tmp);
300 g_free (utf16_expected);
301 g_free (utf16_from_utf8);
302 g_free (utf16_from_ucs4);
303 g_free (utf8_result);
304 g_free (ucs4_result);
309 main (int argc, char **argv)
313 GError *error = NULL;
318 gint start_line = 0; /* Quiet GCC */
319 gchar *utf8 = NULL; /* Quiet GCC */
321 Status status = VALID; /* Quiet GCC */
323 g_test_init (&argc, &argv, NULL);
325 testfile = g_test_build_filename (G_TEST_DIST, "utf8.txt", NULL);
327 g_file_get_contents (testfile, &contents, NULL, &error);
329 croak ("Cannot open utf8.txt: %s", error->message);
331 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
335 /* Loop over lines */
338 while (*p && (*p == ' ' || *p == '\t'))
342 while (*end && (*end != '\r' && *end != '\n'))
345 if (!*p || *p == '#' || *p == '\r' || *p == '\n')
348 tmp = g_strstrip (g_strndup (p, end - p));
361 if (!strcmp (tmp, "VALID"))
363 else if (!strcmp (tmp, "INCOMPLETE"))
365 else if (!strcmp (tmp, "NOTUNICODE"))
367 else if (!strcmp (tmp, "OVERLONG"))
369 else if (!strcmp (tmp, "MALFORMED"))
372 croak ("Invalid status on line %d\n", line);
374 if (status != VALID && status != NOTUNICODE)
375 state++; /* No UCS-4 data */
382 p = strtok (tmp, " \t");
387 gunichar ch = strtoul (p, &endptr, 16);
389 croak ("Invalid UCS-4 character on line %d\n", line);
391 g_array_append_val (ucs4, ch);
393 p = strtok (NULL, " \t");
400 state = (state + 1) % 3;
404 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
405 g_array_set_size (ucs4, 0);
411 if (*p && *p == '\r')
413 if (*p && *p == '\n')
420 g_array_free (ucs4, TRUE);