Bump to cups 2.3.3
[platform/upstream/cups.git] / cups / testi18n.c
1 /*
2  * Internationalization test for CUPS.
3  *
4  * Copyright 2007-2014 by Apple Inc.
5  * Copyright 1997-2006 by Easy Software Products.
6  *
7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
8  */
9
10 /*
11  * Include necessary headers...
12  */
13
14 #include "string-private.h"
15 #include "language-private.h"
16 #include <stdlib.h>
17 #include <time.h>
18 #include <unistd.h>
19
20
21 /*
22  * Local globals...
23  */
24
25 static const char * const lang_encodings[] =
26                         {               /* Encoding strings */
27                           "us-ascii",           "iso-8859-1",
28                           "iso-8859-2",         "iso-8859-3",
29                           "iso-8859-4",         "iso-8859-5",
30                           "iso-8859-6",         "iso-8859-7",
31                           "iso-8859-8",         "iso-8859-9",
32                           "iso-8859-10",        "utf-8",
33                           "iso-8859-13",        "iso-8859-14",
34                           "iso-8859-15",        "windows-874",
35                           "windows-1250",       "windows-1251",
36                           "windows-1252",       "windows-1253",
37                           "windows-1254",       "windows-1255",
38                           "windows-1256",       "windows-1257",
39                           "windows-1258",       "koi8-r",
40                           "koi8-u",             "iso-8859-11",
41                           "iso-8859-16",        "mac-roman",
42                           "unknown",            "unknown",
43                           "unknown",            "unknown",
44                           "unknown",            "unknown",
45                           "unknown",            "unknown",
46                           "unknown",            "unknown",
47                           "unknown",            "unknown",
48                           "unknown",            "unknown",
49                           "unknown",            "unknown",
50                           "unknown",            "unknown",
51                           "unknown",            "unknown",
52                           "unknown",            "unknown",
53                           "unknown",            "unknown",
54                           "unknown",            "unknown",
55                           "unknown",            "unknown",
56                           "unknown",            "unknown",
57                           "unknown",            "unknown",
58                           "unknown",            "unknown",
59                           "windows-932",        "windows-936",
60                           "windows-949",        "windows-950",
61                           "windows-1361",       "unknown",
62                           "unknown",            "unknown",
63                           "unknown",            "unknown",
64                           "unknown",            "unknown",
65                           "unknown",            "unknown",
66                           "unknown",            "unknown",
67                           "unknown",            "unknown",
68                           "unknown",            "unknown",
69                           "unknown",            "unknown",
70                           "unknown",            "unknown",
71                           "unknown",            "unknown",
72                           "unknown",            "unknown",
73                           "unknown",            "unknown",
74                           "unknown",            "unknown",
75                           "unknown",            "unknown",
76                           "unknown",            "unknown",
77                           "unknown",            "unknown",
78                           "unknown",            "unknown",
79                           "unknown",            "unknown",
80                           "unknown",            "unknown",
81                           "unknown",            "unknown",
82                           "unknown",            "unknown",
83                           "unknown",            "unknown",
84                           "unknown",            "unknown",
85                           "unknown",            "unknown",
86                           "unknown",            "unknown",
87                           "unknown",            "unknown",
88                           "unknown",            "unknown",
89                           "unknown",            "unknown",
90                           "unknown",            "unknown",
91                           "euc-cn",             "euc-jp",
92                           "euc-kr",             "euc-tw",
93                           "jis-x0213"
94                         };
95
96
97 /*
98  * Local functions...
99  */
100
101 static void     print_utf8(const char *msg, const cups_utf8_t *src);
102
103
104 /*
105  * 'main()' - Main entry for internationalization test module.
106  */
107
108 int                                     /* O - Exit code */
109 main(int  argc,                         /* I - Argument Count */
110      char *argv[])                      /* I - Arguments */
111 {
112   FILE          *fp;                    /* File pointer */
113   int           count;                  /* File line counter */
114   int           status,                 /* Status of current test */
115                 errors;                 /* Error count */
116   char          line[1024];             /* File line source string */
117   int           len;                    /* Length (count) of string */
118   char          legsrc[1024],           /* Legacy source string */
119                 legdest[1024],          /* Legacy destination string */
120                 *legptr;                /* Pointer into legacy string */
121   cups_utf8_t   utf8latin[] =           /* UTF-8 Latin-1 source */
122     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
123     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
124   cups_utf8_t   utf8repla[] =           /* UTF-8 Latin-1 replacement */
125     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
126     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
127   cups_utf8_t   utf8greek[] =           /* UTF-8 Greek source string */
128     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
129     /* "A != <ALPHA>." - use ISO 8859-7 */
130   cups_utf8_t   utf8japan[] =           /* UTF-8 Japanese source */
131     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
132     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
133   cups_utf8_t   utf8taiwan[] =          /* UTF-8 Chinese source */
134     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
135     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
136   cups_utf8_t   utf8dest[1024];         /* UTF-8 destination string */
137   cups_utf32_t  utf32dest[1024];        /* UTF-32 destination string */
138
139
140   if (argc > 1)
141   {
142     int                 i;              /* Looping var */
143     cups_encoding_t     encoding;       /* Source encoding */
144
145
146     if (argc != 3)
147     {
148       puts("Usage: ./testi18n [filename charset]");
149       return (1);
150     }
151
152     if ((fp = fopen(argv[1], "rb")) == NULL)
153     {
154       perror(argv[1]);
155       return (1);
156     }
157
158     for (i = 0, encoding = CUPS_AUTO_ENCODING;
159          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
160          i ++)
161       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
162       {
163         encoding = (cups_encoding_t)i;
164         break;
165       }
166
167     if (encoding == CUPS_AUTO_ENCODING)
168     {
169       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
170       return (1);
171     }
172
173     while (fgets(line, sizeof(line), fp))
174     {
175       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
176       {
177         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
178         return (1);
179       }
180
181       fputs((char *)utf8dest, stdout);
182     }
183
184     fclose(fp);
185     return (0);
186   }
187
188  /*
189   * Start with some conversion tests from a UTF-8 test file.
190   */
191
192   errors = 0;
193
194   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
195   {
196     perror("utf8demo.txt");
197     return (1);
198   }
199
200  /*
201   * cupsUTF8ToUTF32
202   */
203
204   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
205
206   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
207   {
208     count ++;
209
210     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
211     {
212       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
213       errors ++;
214       status = 1;
215       break;
216     }
217   }
218
219   if (!status)
220     puts("PASS");
221
222  /*
223   * cupsUTF8ToCharset(CUPS_EUC_JP)
224   */
225
226   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
227
228   rewind(fp);
229
230   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
231   {
232     count ++;
233
234     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
235     if (len < 0)
236     {
237       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
238       errors ++;
239       status = 1;
240       break;
241     }
242   }
243
244   if (!status)
245     puts("PASS");
246
247   fclose(fp);
248
249  /*
250   * Test UTF-8 to legacy charset (ISO 8859-1)...
251   */
252
253   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
254
255   legdest[0] = 0;
256
257   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
258   if (len < 0)
259   {
260     printf("FAIL (len=%d)\n", len);
261     errors ++;
262   }
263   else
264     puts("PASS");
265
266  /*
267   * cupsCharsetToUTF8
268   */
269
270   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
271
272   strlcpy(legsrc, legdest, sizeof(legsrc));
273
274   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
275   if ((size_t)len != strlen((char *)utf8latin))
276   {
277     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
278     print_utf8("    utf8latin", utf8latin);
279     print_utf8("    utf8dest", utf8dest);
280     errors ++;
281   }
282   else if (memcmp(utf8latin, utf8dest, (size_t)len))
283   {
284     puts("FAIL (results do not match)");
285     print_utf8("    utf8latin", utf8latin);
286     print_utf8("    utf8dest", utf8dest);
287     errors ++;
288   }
289   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
290   {
291     puts("FAIL (replacement characters do not work!)");
292     errors ++;
293   }
294   else
295     puts("PASS");
296
297  /*
298   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
299   */
300
301   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
302
303   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
304   {
305     puts("FAIL");
306     errors ++;
307   }
308   else
309   {
310     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
311
312     if (*legptr)
313     {
314       puts("FAIL (unknown character)");
315       errors ++;
316     }
317     else
318       puts("PASS");
319   }
320
321   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
322
323   strlcpy(legsrc, legdest, sizeof(legsrc));
324
325   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
326   if ((size_t)len != strlen((char *)utf8greek))
327   {
328     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
329     print_utf8("    utf8greek", utf8greek);
330     print_utf8("    utf8dest", utf8dest);
331     errors ++;
332   }
333   else if (memcmp(utf8greek, utf8dest, (size_t)len))
334   {
335     puts("FAIL (results do not match)");
336     print_utf8("    utf8greek", utf8greek);
337     print_utf8("    utf8dest", utf8dest);
338     errors ++;
339   }
340   else
341     puts("PASS");
342
343  /*
344   * Test UTF-8 to/from legacy charset (Windows 932)...
345   */
346
347   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
348
349   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
350   {
351     puts("FAIL");
352     errors ++;
353   }
354   else
355   {
356     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
357
358     if (*legptr)
359     {
360       puts("FAIL (unknown character)");
361       errors ++;
362     }
363     else
364       puts("PASS");
365   }
366
367   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
368
369   strlcpy(legsrc, legdest, sizeof(legsrc));
370
371   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
372   if ((size_t)len != strlen((char *)utf8japan))
373   {
374     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
375     print_utf8("    utf8japan", utf8japan);
376     print_utf8("    utf8dest", utf8dest);
377     errors ++;
378   }
379   else if (memcmp(utf8japan, utf8dest, (size_t)len))
380   {
381     puts("FAIL (results do not match)");
382     print_utf8("    utf8japan", utf8japan);
383     print_utf8("    utf8dest", utf8dest);
384     errors ++;
385   }
386   else
387     puts("PASS");
388
389  /*
390   * Test UTF-8 to/from legacy charset (EUC-JP)...
391   */
392
393   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
394
395   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
396   {
397     puts("FAIL");
398     errors ++;
399   }
400   else
401   {
402     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
403
404     if (*legptr)
405     {
406       puts("FAIL (unknown character)");
407       errors ++;
408     }
409     else
410       puts("PASS");
411   }
412
413 #ifndef __linux
414   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
415
416   strlcpy(legsrc, legdest, sizeof(legsrc));
417
418   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
419   if ((size_t)len != strlen((char *)utf8japan))
420   {
421     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
422     print_utf8("    utf8japan", utf8japan);
423     print_utf8("    utf8dest", utf8dest);
424     errors ++;
425   }
426   else if (memcmp(utf8japan, utf8dest, (size_t)len))
427   {
428     puts("FAIL (results do not match)");
429     print_utf8("    utf8japan", utf8japan);
430     print_utf8("    utf8dest", utf8dest);
431     errors ++;
432   }
433   else
434     puts("PASS");
435 #endif /* !__linux */
436
437  /*
438   * Test UTF-8 to/from legacy charset (Windows 950)...
439   */
440
441   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
442
443   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
444   {
445     puts("FAIL");
446     errors ++;
447   }
448   else
449   {
450     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
451
452     if (*legptr)
453     {
454       puts("FAIL (unknown character)");
455       errors ++;
456     }
457     else
458       puts("PASS");
459   }
460
461   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
462
463   strlcpy(legsrc, legdest, sizeof(legsrc));
464
465   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
466   if ((size_t)len != strlen((char *)utf8taiwan))
467   {
468     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
469     print_utf8("    utf8taiwan", utf8taiwan);
470     print_utf8("    utf8dest", utf8dest);
471     errors ++;
472   }
473   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
474   {
475     puts("FAIL (results do not match)");
476     print_utf8("    utf8taiwan", utf8taiwan);
477     print_utf8("    utf8dest", utf8dest);
478     errors ++;
479   }
480   else
481     puts("PASS");
482
483  /*
484   * Test UTF-8 to/from legacy charset (EUC-TW)...
485   */
486
487   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
488
489   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
490   {
491     puts("FAIL");
492     errors ++;
493   }
494   else
495   {
496     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
497
498     if (*legptr)
499     {
500       puts("FAIL (unknown character)");
501       errors ++;
502     }
503     else
504       puts("PASS");
505   }
506
507   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
508
509   strlcpy(legsrc, legdest, sizeof(legsrc));
510
511   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
512   if ((size_t)len != strlen((char *)utf8taiwan))
513   {
514     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
515     print_utf8("    utf8taiwan", utf8taiwan);
516     print_utf8("    utf8dest", utf8dest);
517     errors ++;
518   }
519   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
520   {
521     puts("FAIL (results do not match)");
522     print_utf8("    utf8taiwan", utf8taiwan);
523     print_utf8("    utf8dest", utf8dest);
524     errors ++;
525   }
526   else
527     puts("PASS");
528
529 #if 0
530  /*
531   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
532   */
533   if (verbose)
534     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
535   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
536   if (len < 0)
537     return (1);
538   if (verbose)
539   {
540     print_utf8(" utf8good ", utf8good);
541     print_utf32(" utf32dest", utf32dest);
542   }
543   memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
544   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
545   if (len < 0)
546     return (1);
547   if (len != strlen ((char *) utf8good))
548     return (1);
549   if (memcmp(utf8good, utf8dest, len) != 0)
550     return (1);
551
552  /*
553   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
554   */
555   if (verbose)
556     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
557   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
558   if (len >= 0)
559     return (1);
560   if (verbose)
561     print_utf8(" utf8bad  ", utf8bad);
562
563  /*
564   * Test _cupsCharmapFlush()...
565   */
566   if (verbose)
567     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
568   _cupsCharmapFlush();
569   return (0);
570 #endif /* 0 */
571
572   return (errors > 0);
573 }
574
575
576 /*
577  * 'print_utf8()' - Print UTF-8 string with (optional) message.
578  */
579
580 static void
581 print_utf8(const char        *msg,      /* I - Message String */
582            const cups_utf8_t *src)      /* I - UTF-8 Source String */
583 {
584   const char    *prefix;                /* Prefix string */
585
586
587   if (msg)
588     printf("%s:", msg);
589
590   for (prefix = " "; *src; src ++)
591   {
592     printf("%s%02x", prefix, *src);
593
594     if ((src[0] & 0x80) && (src[1] & 0x80))
595       prefix = "";
596     else
597       prefix = " ";
598   }
599
600   putchar('\n');
601 }