Revert manifest to default one
[external/cups.git] / cups / testi18n.c
1 /*
2  * "$Id: testi18n.c 9793 2011-05-20 03:49:49Z mike $"
3  *
4  *   Internationalization test for CUPS.
5  *
6  *   Copyright 2007-2011 by Apple Inc.
7  *   Copyright 1997-2006 by Easy Software Products.
8  *
9  *   These coded instructions, statements, and computer programs are the
10  *   property of Apple Inc. and are protected by Federal copyright
11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
12  *   which should have been included with this file.  If this file is
13  *   file is missing or damaged, see the license at "http://www.cups.org/".
14  *
15  *   This file is subject to the Apple OS-Developed Software exception.
16  *
17  * Contents:
18  *
19  *   main()       - Main entry for internationalization test module.
20  *   print_utf8() - Print UTF-8 string with (optional) message.
21  */
22
23 /*
24  * Include necessary headers...
25  */
26
27 #include "string-private.h"
28 #include "language-private.h"
29 #include <stdlib.h>
30 #include <time.h>
31 #include <unistd.h>
32
33
34 /*
35  * Local globals...
36  */
37
38 static const char * const lang_encodings[] =
39                         {               /* Encoding strings */
40                           "us-ascii",           "iso-8859-1",
41                           "iso-8859-2",         "iso-8859-3",
42                           "iso-8859-4",         "iso-8859-5",
43                           "iso-8859-6",         "iso-8859-7",
44                           "iso-8859-8",         "iso-8859-9",
45                           "iso-8859-10",        "utf-8",
46                           "iso-8859-13",        "iso-8859-14",
47                           "iso-8859-15",        "windows-874",
48                           "windows-1250",       "windows-1251",
49                           "windows-1252",       "windows-1253",
50                           "windows-1254",       "windows-1255",
51                           "windows-1256",       "windows-1257",
52                           "windows-1258",       "koi8-r",
53                           "koi8-u",             "iso-8859-11",
54                           "iso-8859-16",        "mac-roman",
55                           "unknown",            "unknown",
56                           "unknown",            "unknown",
57                           "unknown",            "unknown",
58                           "unknown",            "unknown",
59                           "unknown",            "unknown",
60                           "unknown",            "unknown",
61                           "unknown",            "unknown",
62                           "unknown",            "unknown",
63                           "unknown",            "unknown",
64                           "unknown",            "unknown",
65                           "unknown",            "unknown",
66                           "unknown",            "unknown",
67                           "unknown",            "unknown",
68                           "unknown",            "unknown",
69                           "unknown",            "unknown",
70                           "unknown",            "unknown",
71                           "unknown",            "unknown",
72                           "windows-932",        "windows-936",
73                           "windows-949",        "windows-950",
74                           "windows-1361",       "unknown",
75                           "unknown",            "unknown",
76                           "unknown",            "unknown",
77                           "unknown",            "unknown",
78                           "unknown",            "unknown",
79                           "unknown",            "unknown",
80                           "unknown",            "unknown",
81                           "unknown",            "unknown",
82                           "unknown",            "unknown",
83                           "unknown",            "unknown",
84                           "unknown",            "unknown",
85                           "unknown",            "unknown",
86                           "unknown",            "unknown",
87                           "unknown",            "unknown",
88                           "unknown",            "unknown",
89                           "unknown",            "unknown",
90                           "unknown",            "unknown",
91                           "unknown",            "unknown",
92                           "unknown",            "unknown",
93                           "unknown",            "unknown",
94                           "unknown",            "unknown",
95                           "unknown",            "unknown",
96                           "unknown",            "unknown",
97                           "unknown",            "unknown",
98                           "unknown",            "unknown",
99                           "unknown",            "unknown",
100                           "unknown",            "unknown",
101                           "unknown",            "unknown",
102                           "unknown",            "unknown",
103                           "unknown",            "unknown",
104                           "euc-cn",             "euc-jp",
105                           "euc-kr",             "euc-tw",
106                           "jis-x0213"
107                         };
108
109
110 /*
111  * Local functions...
112  */
113
114 static void     print_utf8(const char *msg, const cups_utf8_t *src);
115
116
117 /*
118  * 'main()' - Main entry for internationalization test module.
119  */
120
121 int                                     /* O - Exit code */
122 main(int  argc,                         /* I - Argument Count */
123      char *argv[])                      /* I - Arguments */
124 {
125   FILE          *fp;                    /* File pointer */
126   int           count;                  /* File line counter */
127   int           status,                 /* Status of current test */
128                 errors;                 /* Error count */
129   char          line[1024];             /* File line source string */
130   int           len;                    /* Length (count) of string */
131   char          legsrc[1024],           /* Legacy source string */
132                 legdest[1024],          /* Legacy destination string */
133                 *legptr;                /* Pointer into legacy string */
134   cups_utf8_t   utf8latin[] =           /* UTF-8 Latin-1 source */
135     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
136     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
137   cups_utf8_t   utf8repla[] =           /* UTF-8 Latin-1 replacement */
138     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
139     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
140   cups_utf8_t   utf8greek[] =           /* UTF-8 Greek source string */
141     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
142     /* "A != <ALPHA>." - use ISO 8859-7 */
143   cups_utf8_t   utf8japan[] =           /* UTF-8 Japanese source */
144     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
145     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
146   cups_utf8_t   utf8taiwan[] =          /* UTF-8 Chinese source */
147     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
148     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
149   cups_utf8_t   utf8dest[1024];         /* UTF-8 destination string */
150   cups_utf32_t  utf32dest[1024];        /* UTF-32 destination string */
151
152
153   if (argc > 1)
154   {
155     int                 i;              /* Looping var */
156     cups_encoding_t     encoding;       /* Source encoding */
157
158
159     if (argc != 3)
160     {
161       puts("Usage: ./testi18n [filename charset]");
162       return (1);
163     }
164
165     if ((fp = fopen(argv[1], "rb")) == NULL)
166     {
167       perror(argv[1]);
168       return (1);
169     }
170
171     for (i = 0, encoding = CUPS_AUTO_ENCODING;
172          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
173          i ++)
174       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
175       {
176         encoding = (cups_encoding_t)i;
177         break;
178       }
179
180     if (encoding == CUPS_AUTO_ENCODING)
181     {
182       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
183       return (1);
184     }
185
186     while (fgets(line, sizeof(line), fp))
187     {
188       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
189       {
190         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
191         return (1);
192       }
193
194       fputs((char *)utf8dest, stdout);
195     }
196
197     fclose(fp);
198     return (0);
199   }
200
201  /*
202   * Start with some conversion tests from a UTF-8 test file.
203   */
204
205   errors = 0;
206
207   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
208   {
209     perror("utf8demo.txt");
210     return (1);
211   }
212
213  /*
214   * cupsUTF8ToUTF32
215   */
216
217   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
218
219   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
220   {
221     count ++;
222
223     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
224     {
225       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
226       errors ++;
227       status = 1;
228       break;
229     }
230   }
231
232   if (!status)
233     puts("PASS");
234
235  /*
236   * cupsUTF8ToCharset(CUPS_EUC_JP)
237   */
238
239   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
240
241   rewind(fp);
242
243   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
244   {
245     count ++;
246
247     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
248     if (len < 0)
249     {
250       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
251       errors ++;
252       status = 1;
253       break;
254     }
255   }
256
257   if (!status)
258     puts("PASS");
259
260   fclose(fp);
261
262  /*
263   * Test UTF-8 to legacy charset (ISO 8859-1)...
264   */
265
266   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
267
268   legdest[0] = 0;
269
270   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
271   if (len < 0)
272   {
273     printf("FAIL (len=%d)\n", len);
274     errors ++;
275   }
276   else
277     puts("PASS");
278
279  /*
280   * cupsCharsetToUTF8
281   */
282
283   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
284
285   strcpy(legsrc, legdest);
286
287   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
288   if (len != strlen((char *)utf8latin))
289   {
290     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
291     print_utf8("    utf8latin", utf8latin);
292     print_utf8("    utf8dest", utf8dest);
293     errors ++;
294   }
295   else if (memcmp(utf8latin, utf8dest, len))
296   {
297     puts("FAIL (results do not match)");
298     print_utf8("    utf8latin", utf8latin);
299     print_utf8("    utf8dest", utf8dest);
300     errors ++;
301   }
302   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
303   {
304     puts("FAIL (replacement characters do not work!)");
305     errors ++;
306   }
307   else
308     puts("PASS");
309
310  /*
311   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
312   */
313
314   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
315
316   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
317   {
318     puts("FAIL");
319     errors ++;
320   }
321   else
322   {
323     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
324
325     if (*legptr)
326     {
327       puts("FAIL (unknown character)");
328       errors ++;
329     }
330     else
331       puts("PASS");
332   }
333
334   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
335
336   strcpy(legsrc, legdest);
337
338   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
339   if (len != strlen((char *)utf8greek))
340   {
341     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
342     print_utf8("    utf8greek", utf8greek);
343     print_utf8("    utf8dest", utf8dest);
344     errors ++;
345   }
346   else if (memcmp(utf8greek, utf8dest, len))
347   {
348     puts("FAIL (results do not match)");
349     print_utf8("    utf8greek", utf8greek);
350     print_utf8("    utf8dest", utf8dest);
351     errors ++;
352   }
353   else
354     puts("PASS");
355
356  /*
357   * Test UTF-8 to/from legacy charset (Windows 932)...
358   */
359
360   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
361
362   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
363   {
364     puts("FAIL");
365     errors ++;
366   }
367   else
368   {
369     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
370
371     if (*legptr)
372     {
373       puts("FAIL (unknown character)");
374       errors ++;
375     }
376     else
377       puts("PASS");
378   }
379
380   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
381
382   strcpy(legsrc, legdest);
383
384   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
385   if (len != strlen((char *)utf8japan))
386   {
387     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
388     print_utf8("    utf8japan", utf8japan);
389     print_utf8("    utf8dest", utf8dest);
390     errors ++;
391   }
392   else if (memcmp(utf8japan, utf8dest, len))
393   {
394     puts("FAIL (results do not match)");
395     print_utf8("    utf8japan", utf8japan);
396     print_utf8("    utf8dest", utf8dest);
397     errors ++;
398   }
399   else
400     puts("PASS");
401
402  /*
403   * Test UTF-8 to/from legacy charset (EUC-JP)...
404   */
405
406   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
407
408   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
409   {
410     puts("FAIL");
411     errors ++;
412   }
413   else
414   {
415     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
416
417     if (*legptr)
418     {
419       puts("FAIL (unknown character)");
420       errors ++;
421     }
422     else
423       puts("PASS");
424   }
425
426 #if !defined(__linux__) && !defined(__GLIBC__)
427   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
428
429   strcpy(legsrc, legdest);
430
431   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
432   if (len != strlen((char *)utf8japan))
433   {
434     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
435     print_utf8("    utf8japan", utf8japan);
436     print_utf8("    utf8dest", utf8dest);
437     errors ++;
438   }
439   else if (memcmp(utf8japan, utf8dest, len))
440   {
441     puts("FAIL (results do not match)");
442     print_utf8("    utf8japan", utf8japan);
443     print_utf8("    utf8dest", utf8dest);
444     errors ++;
445   }
446   else
447     puts("PASS");
448 #endif /* !__linux */
449
450  /*
451   * Test UTF-8 to/from legacy charset (Windows 950)...
452   */
453
454   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
455
456   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
457   {
458     puts("FAIL");
459     errors ++;
460   }
461   else
462   {
463     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
464
465     if (*legptr)
466     {
467       puts("FAIL (unknown character)");
468       errors ++;
469     }
470     else
471       puts("PASS");
472   }
473
474   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
475
476   strcpy(legsrc, legdest);
477
478   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
479   if (len != strlen((char *)utf8taiwan))
480   {
481     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
482     print_utf8("    utf8taiwan", utf8taiwan);
483     print_utf8("    utf8dest", utf8dest);
484     errors ++;
485   }
486   else if (memcmp(utf8taiwan, utf8dest, len))
487   {
488     puts("FAIL (results do not match)");
489     print_utf8("    utf8taiwan", utf8taiwan);
490     print_utf8("    utf8dest", utf8dest);
491     errors ++;
492   }
493   else
494     puts("PASS");
495
496  /*
497   * Test UTF-8 to/from legacy charset (EUC-TW)...
498   */
499
500   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
501
502   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
503   {
504     puts("FAIL");
505     errors ++;
506   }
507   else
508   {
509     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
510
511     if (*legptr)
512     {
513       puts("FAIL (unknown character)");
514       errors ++;
515     }
516     else
517       puts("PASS");
518   }
519
520   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
521
522   strcpy(legsrc, legdest);
523
524   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
525   if (len != strlen((char *)utf8taiwan))
526   {
527     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
528     print_utf8("    utf8taiwan", utf8taiwan);
529     print_utf8("    utf8dest", utf8dest);
530     errors ++;
531   }
532   else if (memcmp(utf8taiwan, utf8dest, len))
533   {
534     puts("FAIL (results do not match)");
535     print_utf8("    utf8taiwan", utf8taiwan);
536     print_utf8("    utf8dest", utf8dest);
537     errors ++;
538   }
539   else
540     puts("PASS");
541
542 #if 0
543  /*
544   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
545   */
546   if (verbose)
547     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
548   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
549   if (len < 0)
550     return (1);
551   if (verbose)
552   {
553     print_utf8(" utf8good ", utf8good);
554     print_utf32(" utf32dest", utf32dest);
555   }
556   memcpy (utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
557   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
558   if (len < 0)
559     return (1);
560   if (len != strlen ((char *) utf8good))
561     return (1);
562   if (memcmp(utf8good, utf8dest, len) != 0)
563     return (1);
564
565  /*
566   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
567   */
568   if (verbose)
569     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
570   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
571   if (len >= 0)
572     return (1);
573   if (verbose)
574     print_utf8(" utf8bad  ", utf8bad);
575
576  /*
577   * Test _cupsCharmapFlush()...
578   */
579   if (verbose)
580     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
581   _cupsCharmapFlush();
582   return (0);
583 #endif /* 0 */
584
585   return (errors > 0);
586 }
587
588
589 /*
590  * 'print_utf8()' - Print UTF-8 string with (optional) message.
591  */
592
593 static void
594 print_utf8(const char        *msg,      /* I - Message String */
595            const cups_utf8_t *src)      /* I - UTF-8 Source String */
596 {
597   const char    *prefix;                /* Prefix string */
598
599
600   if (msg)
601     printf("%s:", msg);
602
603   for (prefix = " "; *src; src ++)
604   {
605     printf("%s%02x", prefix, *src);
606
607     if ((src[0] & 0x80) && (src[1] & 0x80))
608       prefix = "";
609     else
610       prefix = " ";
611   }
612
613   putchar('\n');
614 }
615
616
617 /*
618  * End of "$Id: testi18n.c 9793 2011-05-20 03:49:49Z mike $"
619  */