1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
27 #undef G_DISABLE_ASSERT
36 test_iconv_state (void)
38 gchar *in = "\xf4\xe5\xf8\xe5\xed";
39 gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
42 gsize bytes_written = 0;
45 out = g_convert (in, -1, "UTF-8", "CP1255",
46 &bytes_read, &bytes_written, &error);
48 g_assert (error == NULL);
49 g_assert (bytes_read == 5);
50 g_assert (bytes_written == 10);
51 g_assert (strcmp (out, expected) == 0);
54 /* some tests involving "vulgar fraction one half" */
58 gchar *in = "\xc2\xbd";
61 gsize bytes_written = 0;
64 out = g_convert (in, -1,
66 &bytes_read, &bytes_written,
69 g_assert (error == NULL);
70 g_assert (bytes_read == 2);
71 g_assert (bytes_written == 1);
72 g_assert (strcmp (out, "\xbd") == 0);
75 out = g_convert (in, -1,
76 "ISO8859-15", "UTF-8",
77 &bytes_read, &bytes_written,
80 g_assert (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
81 g_assert (bytes_read == 0);
82 g_assert (bytes_written == 0);
83 g_assert (out == NULL);
84 g_clear_error (&error);
86 out = g_convert_with_fallback (in, -1,
87 "ISO8859-15", "UTF-8",
89 &bytes_read, &bytes_written,
92 g_assert (error == NULL);
93 g_assert (bytes_read == 2);
94 g_assert (bytes_written == 1);
95 g_assert (strcmp (out, "a") == 0);
100 test_byte_order (void)
102 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
103 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
104 gchar *expected = "\xce\x93";
106 gsize bytes_read = 0;
107 gsize bytes_written = 0;
108 GError *error = NULL;
110 out = g_convert (in_be, sizeof (in_be),
112 &bytes_read, &bytes_written,
115 g_assert (error == NULL);
116 g_assert (bytes_read == 4);
117 g_assert (bytes_written == 2);
118 g_assert (strcmp (out, expected) == 0);
121 out = g_convert (in_le, sizeof (in_le),
123 &bytes_read, &bytes_written,
126 g_assert (error == NULL);
127 g_assert (bytes_read == 4);
128 g_assert (bytes_written == 2);
129 g_assert (strcmp (out, expected) == 0);
134 check_utf8_to_ucs4 (const char *utf8,
136 const gunichar *ucs4,
140 gunichar *result, *result2, *result3;
141 glong items_read, items_read2;
142 glong items_written, items_written2;
143 GError *error, *error2, *error3;
148 /* check the fast conversion */
149 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
151 g_assert (items_written == ucs4_len);
153 for (i = 0; i <= items_written; i++)
154 g_assert (result[i] == ucs4[i]);
160 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
162 if (utf8_len == strlen (utf8))
164 /* check that len == -1 yields identical results */
166 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
167 g_assert (error || items_read2 == items_read);
168 g_assert (error || items_written2 == items_written2);
169 g_assert (!!result == !!result2);
170 g_assert (!!error == !!error2);
172 for (i = 0; i <= items_written; i++)
173 g_assert (result[i] == result2[i]);
177 g_error_free (error2);
181 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
183 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
185 g_assert (error == NULL);
186 g_assert (items_read == error_pos);
187 g_assert (items_written == ucs4_len);
189 for (i = 0; i <= items_written; i++)
190 g_assert (result[i] == ucs4[i]);
194 g_assert (error != NULL);
195 g_assert (result == NULL);
196 g_assert (items_read == error_pos);
197 g_error_free (error);
199 g_assert (error3 != NULL);
200 g_assert (result3 == NULL);
201 g_error_free (error3);
205 g_assert (error == NULL);
206 g_assert (items_read == utf8_len);
207 g_assert (items_written == ucs4_len);
209 for (i = 0; i <= items_written; i++)
210 g_assert (result[i] == ucs4[i]);
212 g_assert (error3 == NULL);
214 for (i = 0; i <= ucs4_len; i++)
215 g_assert (result3[i] == ucs4[i]);
223 check_ucs4_to_utf8 (const gunichar *ucs4,
229 gchar *result, *result2, *result3;
230 glong items_read, items_read2;
231 glong items_written, items_written2;
232 GError *error, *error2, *error3;
235 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
237 if (ucs4[ucs4_len] == 0)
239 /* check that len == -1 yields identical results */
241 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
243 g_assert (error || items_read2 == items_read);
244 g_assert (error || items_written2 == items_written);
245 g_assert (!!result == !!result2);
246 g_assert (!!error == !!error2);
248 g_assert (strcmp (result, result2) == 0);
252 g_error_free (error2);
256 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
260 g_assert (error != NULL);
261 g_assert (result == NULL);
262 g_assert (items_read == error_pos);
263 g_error_free (error);
265 g_assert (error3 != NULL);
266 g_assert (result3 == NULL);
267 g_error_free (error3);
271 g_assert (error == NULL);
272 g_assert (items_read == ucs4_len);
273 g_assert (items_written == utf8_len);
275 g_assert (strcmp (result, utf8) == 0);
277 g_assert (error3 == NULL);
279 g_assert (strcmp (result3, utf8) == 0);
287 check_utf8_to_utf16 (const char *utf8,
289 const gunichar2 *utf16,
293 gunichar2 *result, *result2, *result3;
294 glong items_read, items_read2;
295 glong items_written, items_written2;
296 GError *error, *error2, *error3;
300 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
302 if (utf8_len == strlen (utf8))
304 /* check that len == -1 yields identical results */
306 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
307 g_assert (error || items_read2 == items_read);
308 g_assert (error || items_written2 == items_written2);
309 g_assert (!!result == !!result2);
310 g_assert (!!error == !!error2);
312 for (i = 0; i <= items_written; i++)
313 g_assert (result[i] == result2[i]);
317 g_error_free (error2);
321 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
323 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
325 g_assert (error == NULL);
326 g_assert (items_read == error_pos);
327 g_assert (items_written == utf16_len);
329 for (i = 0; i <= items_written; i++)
330 g_assert (result[i] == utf16[i]);
334 g_assert (error != NULL);
335 g_assert (result == NULL);
336 g_assert (items_read == error_pos);
337 g_error_free (error);
339 g_assert (error3 != NULL);
340 g_assert (result3 == NULL);
341 g_error_free (error3);
345 g_assert (error == NULL);
346 g_assert (items_read == utf8_len);
347 g_assert (items_written == utf16_len);
349 for (i = 0; i <= items_written; i++)
350 g_assert (result[i] == utf16[i]);
352 g_assert (error3 == NULL);
354 for (i = 0; i <= utf16_len; i++)
355 g_assert (result3[i] == utf16[i]);
363 check_utf16_to_utf8 (const gunichar2 *utf16,
369 gchar *result, *result2, *result3;
370 glong items_read, items_read2;
371 glong items_written, items_written2;
372 GError *error, *error2, *error3;
375 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
376 if (utf16[utf16_len] == 0)
378 /* check that len == -1 yields identical results */
380 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
382 g_assert (error || items_read2 == items_read);
383 g_assert (error || items_written2 == items_written);
384 g_assert (!!result == !!result2);
385 g_assert (!!error == !!error2);
387 g_assert (strcmp (result, result2) == 0);
391 g_error_free (error2);
395 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
397 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
399 g_assert (error == NULL);
400 g_assert (items_read == error_pos);
401 g_assert (items_read + 1 == utf16_len);
402 g_assert (items_written == utf8_len);
404 g_assert (strcmp (result, utf8) == 0);
408 g_assert (error != NULL);
409 g_assert (result == NULL);
410 g_assert (items_read == error_pos);
411 g_error_free (error);
413 g_assert (error3 != NULL);
414 g_assert (result3 == NULL);
415 g_error_free (error3);
419 g_assert (error == NULL);
420 g_assert (items_read == utf16_len);
421 g_assert (items_written == utf8_len);
423 g_assert (strcmp (result, utf8) == 0);
425 g_assert (error3 == NULL);
427 g_assert (strcmp (result3, utf8) == 0);
435 check_ucs4_to_utf16 (const gunichar *ucs4,
437 const gunichar2 *utf16,
441 gunichar2 *result, *result2, *result3;
442 glong items_read, items_read2;
443 glong items_written, items_written2;
444 GError *error, *error2, *error3;
448 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
450 if (ucs4[ucs4_len] == 0)
452 /* check that len == -1 yields identical results */
454 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
456 g_assert (error || items_read2 == items_read);
457 g_assert (error || items_written2 == items_written);
458 g_assert (!!result == !!result2);
459 g_assert (!!error == !!error2);
461 for (i = 0; i <= utf16_len; i++)
462 g_assert (result[i] == result2[i]);
466 g_error_free (error2);
470 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
474 g_assert (error != NULL);
475 g_assert (result == NULL);
476 g_assert (items_read == error_pos);
477 g_error_free (error);
479 g_assert (error3 != NULL);
480 g_assert (result3 == NULL);
481 g_error_free (error3);
485 g_assert (error == NULL);
486 g_assert (items_read == ucs4_len);
487 g_assert (items_written == utf16_len);
489 for (i = 0; i <= utf16_len; i++)
490 g_assert (result[i] == utf16[i]);
492 g_assert (error3 == NULL);
494 for (i = 0; i <= utf16_len; i++)
495 g_assert (result3[i] == utf16[i]);
503 check_utf16_to_ucs4 (const gunichar2 *utf16,
505 const gunichar *ucs4,
509 gunichar *result, *result2, *result3;
510 glong items_read, items_read2;
511 glong items_written, items_written2;
512 GError *error, *error2, *error3;
516 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
517 if (utf16[utf16_len] == 0)
519 /* check that len == -1 yields identical results */
521 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
522 g_assert (error || items_read2 == items_read);
523 g_assert (error || items_written2 == items_written2);
524 g_assert (!!result == !!result2);
525 g_assert (!!error == !!error2);
527 for (i = 0; i <= items_written; i++)
528 g_assert (result[i] == result2[i]);
532 g_error_free (error2);
536 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
538 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
540 g_assert (error == NULL);
541 g_assert (items_read == error_pos);
542 g_assert (items_read + 1 == utf16_len);
543 g_assert (items_written == ucs4_len);
545 for (i = 0; i <= items_written; i++)
546 g_assert (result[i] == ucs4[i]);
550 g_assert (error != NULL);
551 g_assert (result == NULL);
552 g_assert (items_read == error_pos);
553 g_error_free (error);
555 g_assert (error3 != NULL);
556 g_assert (result3 == NULL);
557 g_error_free (error3);
561 g_assert (error == NULL);
562 g_assert (items_read == utf16_len);
563 g_assert (items_written == ucs4_len);
565 for (i = 0; i <= ucs4_len; i++)
566 g_assert (result[i] == ucs4[i]);
568 g_assert (error3 == NULL);
570 for (i = 0; i <= ucs4_len; i++)
571 g_assert (result3[i] == ucs4[i]);
579 test_unicode_conversions (void)
583 gunichar2 utf16[100];
586 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
587 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
589 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
590 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
591 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
592 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
593 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
594 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
596 utf8 = "\316\261\316\262\316\263";
597 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
598 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
600 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
601 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
602 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
603 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
604 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
605 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
607 /* partial utf8 character */
609 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
610 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
612 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
613 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
616 utf8 = "abc\316\316";
620 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
621 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
623 /* partial utf16 character */
625 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
626 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
628 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
629 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
634 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
636 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
637 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
641 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
644 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
645 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
649 main (int argc, char *argv[])
654 test_unicode_conversions ();