2 * "$Id: testi18n.c 9793 2011-05-20 03:49:49Z mike $"
4 * Internationalization test for CUPS.
6 * Copyright 2007-2011 by Apple Inc.
7 * Copyright 1997-2006 by Easy Software Products.
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
15 * This file is subject to the Apple OS-Developed Software exception.
19 * main() - Main entry for internationalization test module.
20 * print_utf8() - Print UTF-8 string with (optional) message.
24 * Include necessary headers...
27 #include "string-private.h"
28 #include "language-private.h"
38 static const char * const lang_encodings[] =
39 { /* Encoding strings */
40 "us-ascii", "iso-8859-1",
41 "iso-8859-2", "iso-8859-3",
42 "iso-8859-4", "iso-8859-5",
43 "iso-8859-6", "iso-8859-7",
44 "iso-8859-8", "iso-8859-9",
45 "iso-8859-10", "utf-8",
46 "iso-8859-13", "iso-8859-14",
47 "iso-8859-15", "windows-874",
48 "windows-1250", "windows-1251",
49 "windows-1252", "windows-1253",
50 "windows-1254", "windows-1255",
51 "windows-1256", "windows-1257",
52 "windows-1258", "koi8-r",
53 "koi8-u", "iso-8859-11",
54 "iso-8859-16", "mac-roman",
72 "windows-932", "windows-936",
73 "windows-949", "windows-950",
74 "windows-1361", "unknown",
100 "unknown", "unknown",
101 "unknown", "unknown",
102 "unknown", "unknown",
103 "unknown", "unknown",
114 static void print_utf8(const char *msg, const cups_utf8_t *src);
118 * 'main()' - Main entry for internationalization test module.
121 int /* O - Exit code */
122 main(int argc, /* I - Argument Count */
123 char *argv[]) /* I - Arguments */
125 FILE *fp; /* File pointer */
126 int count; /* File line counter */
127 int status, /* Status of current test */
128 errors; /* Error count */
129 char line[1024]; /* File line source string */
130 int len; /* Length (count) of string */
131 char legsrc[1024], /* Legacy source string */
132 legdest[1024], /* Legacy destination string */
133 *legptr; /* Pointer into legacy string */
134 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */
135 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
136 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
137 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */
138 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
139 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
140 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */
141 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
142 /* "A != <ALPHA>." - use ISO 8859-7 */
143 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */
144 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
145 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
146 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */
147 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
148 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
149 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */
150 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */
155 int i; /* Looping var */
156 cups_encoding_t encoding; /* Source encoding */
161 puts("Usage: ./testi18n [filename charset]");
165 if ((fp = fopen(argv[1], "rb")) == NULL)
171 for (i = 0, encoding = CUPS_AUTO_ENCODING;
172 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
174 if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
176 encoding = (cups_encoding_t)i;
180 if (encoding == CUPS_AUTO_ENCODING)
182 fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
186 while (fgets(line, sizeof(line), fp))
188 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
190 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
194 fputs((char *)utf8dest, stdout);
202 * Start with some conversion tests from a UTF-8 test file.
207 if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
209 perror("utf8demo.txt");
217 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
219 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
223 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
225 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
236 * cupsUTF8ToCharset(CUPS_EUC_JP)
239 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
243 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
247 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
250 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
263 * Test UTF-8 to legacy charset (ISO 8859-1)...
266 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
270 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
273 printf("FAIL (len=%d)\n", len);
283 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
285 strcpy(legsrc, legdest);
287 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
288 if (len != strlen((char *)utf8latin))
290 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
291 print_utf8(" utf8latin", utf8latin);
292 print_utf8(" utf8dest", utf8dest);
295 else if (memcmp(utf8latin, utf8dest, len))
297 puts("FAIL (results do not match)");
298 print_utf8(" utf8latin", utf8latin);
299 print_utf8(" utf8dest", utf8dest);
302 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
304 puts("FAIL (replacement characters do not work!)");
311 * Test UTF-8 to/from legacy charset (ISO 8859-7)...
314 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
316 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
323 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
327 puts("FAIL (unknown character)");
334 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
336 strcpy(legsrc, legdest);
338 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
339 if (len != strlen((char *)utf8greek))
341 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
342 print_utf8(" utf8greek", utf8greek);
343 print_utf8(" utf8dest", utf8dest);
346 else if (memcmp(utf8greek, utf8dest, len))
348 puts("FAIL (results do not match)");
349 print_utf8(" utf8greek", utf8greek);
350 print_utf8(" utf8dest", utf8dest);
357 * Test UTF-8 to/from legacy charset (Windows 932)...
360 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
362 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
369 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
373 puts("FAIL (unknown character)");
380 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
382 strcpy(legsrc, legdest);
384 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
385 if (len != strlen((char *)utf8japan))
387 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
388 print_utf8(" utf8japan", utf8japan);
389 print_utf8(" utf8dest", utf8dest);
392 else if (memcmp(utf8japan, utf8dest, len))
394 puts("FAIL (results do not match)");
395 print_utf8(" utf8japan", utf8japan);
396 print_utf8(" utf8dest", utf8dest);
403 * Test UTF-8 to/from legacy charset (EUC-JP)...
406 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
408 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
415 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
419 puts("FAIL (unknown character)");
426 #if !defined(__linux__) && !defined(__GLIBC__)
427 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
429 strcpy(legsrc, legdest);
431 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
432 if (len != strlen((char *)utf8japan))
434 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
435 print_utf8(" utf8japan", utf8japan);
436 print_utf8(" utf8dest", utf8dest);
439 else if (memcmp(utf8japan, utf8dest, len))
441 puts("FAIL (results do not match)");
442 print_utf8(" utf8japan", utf8japan);
443 print_utf8(" utf8dest", utf8dest);
448 #endif /* !__linux */
451 * Test UTF-8 to/from legacy charset (Windows 950)...
454 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
456 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
463 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
467 puts("FAIL (unknown character)");
474 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
476 strcpy(legsrc, legdest);
478 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
479 if (len != strlen((char *)utf8taiwan))
481 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
482 print_utf8(" utf8taiwan", utf8taiwan);
483 print_utf8(" utf8dest", utf8dest);
486 else if (memcmp(utf8taiwan, utf8dest, len))
488 puts("FAIL (results do not match)");
489 print_utf8(" utf8taiwan", utf8taiwan);
490 print_utf8(" utf8dest", utf8dest);
497 * Test UTF-8 to/from legacy charset (EUC-TW)...
500 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
502 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
509 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
513 puts("FAIL (unknown character)");
520 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
522 strcpy(legsrc, legdest);
524 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
525 if (len != strlen((char *)utf8taiwan))
527 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
528 print_utf8(" utf8taiwan", utf8taiwan);
529 print_utf8(" utf8dest", utf8dest);
532 else if (memcmp(utf8taiwan, utf8dest, len))
534 puts("FAIL (results do not match)");
535 print_utf8(" utf8taiwan", utf8taiwan);
536 print_utf8(" utf8dest", utf8dest);
544 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
547 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
548 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
553 print_utf8(" utf8good ", utf8good);
554 print_utf32(" utf32dest", utf32dest);
556 memcpy (utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
557 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
560 if (len != strlen ((char *) utf8good))
562 if (memcmp(utf8good, utf8dest, len) != 0)
566 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
569 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
570 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
574 print_utf8(" utf8bad ", utf8bad);
577 * Test _cupsCharmapFlush()...
580 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
590 * 'print_utf8()' - Print UTF-8 string with (optional) message.
594 print_utf8(const char *msg, /* I - Message String */
595 const cups_utf8_t *src) /* I - UTF-8 Source String */
597 const char *prefix; /* Prefix string */
603 for (prefix = " "; *src; src ++)
605 printf("%s%02x", prefix, *src);
607 if ((src[0] & 0x80) && (src[1] & 0x80))
618 * End of "$Id: testi18n.c 9793 2011-05-20 03:49:49Z mike $"