1 #undef G_DISABLE_ASSERT
10 static gint exit_status = 0;
13 croak (char *format, ...)
17 va_start (va, format);
18 vfprintf (stderr, format, va);
25 fail (char *format, ...)
29 va_start (va, format);
30 vfprintf (stderr, format, va);
46 ucs4_equal (gunichar *a, gunichar *b)
48 while (*a && *b && (*a == *b))
58 utf16_equal (gunichar2 *a, gunichar2 *b)
60 while (*a && *b && (*a == *b))
70 utf16_count (gunichar2 *a)
88 gboolean is_valid = g_utf8_validate (utf8, -1, &end);
90 glong items_read, items_written;
97 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
107 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
113 if (status == INCOMPLETE)
115 gunichar *ucs4_result;
117 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
119 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
121 fail ("line %d: incomplete input not properly detected\n", line);
124 g_clear_error (&error);
126 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
128 if (!ucs4_result || items_read == strlen (utf8))
130 fail ("line %d: incomplete input not properly detected\n", line);
134 g_free (ucs4_result);
137 if (status == VALID || status == NOTUNICODE)
139 gunichar *ucs4_result;
142 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
145 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
149 if (!ucs4_equal (ucs4_result, ucs4) ||
150 items_read != strlen (utf8) ||
151 items_written != ucs4_len)
153 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
157 g_free (ucs4_result);
159 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
161 if (!ucs4_equal (ucs4_result, ucs4) ||
162 items_written != ucs4_len)
164 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
168 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
171 fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
175 if (strcmp (utf8_result, utf8) != 0 ||
176 items_read != ucs4_len ||
177 items_written != strlen (utf8))
179 fail ("line %d: conversion back to utf8 did not match original\n", line);
183 g_free (utf8_result);
184 g_free (ucs4_result);
189 gunichar2 *utf16_expected_tmp;
190 gunichar2 *utf16_expected;
191 gunichar2 *utf16_from_utf8;
192 gunichar2 *utf16_from_ucs4;
193 gunichar *ucs4_result;
198 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
199 #define TARGET "UTF-16LE"
201 #define TARGET "UTF-16"
204 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
205 NULL, &bytes_written, NULL)))
207 fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
211 /* zero-terminate and remove BOM
213 n_chars = bytes_written / 2;
214 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
217 utf16_expected = g_new (gunichar2, n_chars + 1);
218 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
220 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
222 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
227 utf16_expected = g_new (gunichar2, n_chars + 1);
228 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
231 utf16_expected[n_chars] = '\0';
233 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
235 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
239 if (items_read != strlen (utf8) ||
240 utf16_count (utf16_from_utf8) != items_written)
242 fail ("line %d: length error in conversion to ucs16\n", line);
246 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
248 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
252 if (items_read != ucs4_len ||
253 utf16_count (utf16_from_ucs4) != items_written)
255 fail ("line %d: length error in conversion to ucs16\n", line);
259 if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
260 !utf16_equal (utf16_from_ucs4, utf16_expected))
262 fail ("line %d: results of conversion to ucs16 do not match\n", line);
266 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
268 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
272 if (items_read != utf16_count (utf16_from_utf8) ||
273 items_written != strlen (utf8))
275 fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
279 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
281 fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
285 if (items_read != utf16_count (utf16_from_utf8) ||
286 items_written != ucs4_len)
288 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
292 if (strcmp (utf8, utf8_result) != 0 ||
293 !ucs4_equal (ucs4, ucs4_result))
295 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
299 g_free (utf16_expected_tmp);
300 g_free (utf16_expected);
301 g_free (utf16_from_utf8);
302 g_free (utf16_from_ucs4);
303 g_free (utf8_result);
304 g_free (ucs4_result);
309 main (int argc, char **argv)
314 GError *error = NULL;
319 gint start_line = 0; /* Quiet GCC */
320 gchar *utf8 = NULL; /* Quiet GCC */
322 Status status = VALID; /* Quiet GCC */
324 if (g_getenv ("G_TEST_DATA"))
325 srcdir = g_getenv ("G_TEST_DATA");
329 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL);
331 g_file_get_contents (testfile, &contents, NULL, &error);
333 croak ("Cannot open utf8.txt: %s", error->message);
335 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
339 /* Loop over lines */
342 while (*p && (*p == ' ' || *p == '\t'))
346 while (*end && (*end != '\r' && *end != '\n'))
349 if (!*p || *p == '#' || *p == '\r' || *p == '\n')
352 tmp = g_strstrip (g_strndup (p, end - p));
365 if (!strcmp (tmp, "VALID"))
367 else if (!strcmp (tmp, "INCOMPLETE"))
369 else if (!strcmp (tmp, "NOTUNICODE"))
371 else if (!strcmp (tmp, "OVERLONG"))
373 else if (!strcmp (tmp, "MALFORMED"))
376 croak ("Invalid status on line %d\n", line);
378 if (status != VALID && status != NOTUNICODE)
379 state++; /* No UCS-4 data */
386 p = strtok (tmp, " \t");
391 gunichar ch = strtoul (p, &endptr, 16);
393 croak ("Invalid UCS-4 character on line %d\n", line);
395 g_array_append_val (ucs4, ch);
397 p = strtok (NULL, " \t");
404 state = (state + 1) % 3;
408 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
409 g_array_set_size (ucs4, 0);
415 if (*p && *p == '\r')
417 if (*p && *p == '\n')