1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
32 test_one_locale (const char *name, int codepage)
39 /* Portable code to set the locale. */
41 char name_with_codepage[1024];
43 sprintf (name_with_codepage, "%s.%d", name, codepage);
46 if (setlocale (LC_ALL, name_with_codepage) == NULL)
50 /* Hacky way to set a locale.codepage combination that setlocale() refuses
53 /* Codepage of the current locale, set with setlocale().
54 Not necessarily the same as GetACP(). */
55 extern __declspec(dllimport) unsigned int __lc_codepage;
58 if (setlocale (LC_ALL, name) == NULL)
61 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
62 __lc_codepage = codepage;
80 /* Test whether the codepage is really available. */
81 memset (&state, '\0', sizeof (mbstate_t));
82 if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
87 /* Test zero-length input. */
89 memset (&state, '\0', sizeof (mbstate_t));
90 wc = (wchar_t) 0xBADFACE;
91 ret = mbrtowc (&wc, "x", 0, &state);
92 /* gnulib's implementation returns (size_t)(-2).
93 The AIX 5.1 implementation returns (size_t)(-1).
94 glibc's implementation returns 0. */
95 ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
96 ASSERT (mbsinit (&state));
99 /* Test NUL byte input. */
101 memset (&state, '\0', sizeof (mbstate_t));
102 wc = (wchar_t) 0xBADFACE;
103 ret = mbrtowc (&wc, "", 1, &state);
106 ASSERT (mbsinit (&state));
107 ret = mbrtowc (NULL, "", 1, &state);
109 ASSERT (mbsinit (&state));
112 /* Test single-byte input. */
117 memset (&state, '\0', sizeof (mbstate_t));
118 for (c = 0; c < 0x100; c++)
121 case '\t': case '\v': case '\f':
122 case ' ': case '!': case '"': case '#': case '%':
123 case '&': case '\'': case '(': case ')': case '*':
124 case '+': case ',': case '-': case '.': case '/':
125 case '0': case '1': case '2': case '3': case '4':
126 case '5': case '6': case '7': case '8': case '9':
127 case ':': case ';': case '<': case '=': case '>':
129 case 'A': case 'B': case 'C': case 'D': case 'E':
130 case 'F': case 'G': case 'H': case 'I': case 'J':
131 case 'K': case 'L': case 'M': case 'N': case 'O':
132 case 'P': case 'Q': case 'R': case 'S': case 'T':
133 case 'U': case 'V': case 'W': case 'X': case 'Y':
135 case '[': case '\\': case ']': case '^': case '_':
136 case 'a': case 'b': case 'c': case 'd': case 'e':
137 case 'f': case 'g': case 'h': case 'i': case 'j':
138 case 'k': case 'l': case 'm': case 'n': case 'o':
139 case 'p': case 'q': case 'r': case 's': case 't':
140 case 'u': case 'v': case 'w': case 'x': case 'y':
141 case 'z': case '{': case '|': case '}': case '~':
142 /* c is in the ISO C "basic character set". */
144 wc = (wchar_t) 0xBADFACE;
145 ret = mbrtowc (&wc, buf, 1, &state);
148 ASSERT (mbsinit (&state));
149 ret = mbrtowc (NULL, buf, 1, &state);
151 ASSERT (mbsinit (&state));
156 /* Test special calling convention, passing a NULL pointer. */
158 memset (&state, '\0', sizeof (mbstate_t));
159 wc = (wchar_t) 0xBADFACE;
160 ret = mbrtowc (&wc, NULL, 5, &state);
162 ASSERT (wc == (wchar_t) 0xBADFACE);
163 ASSERT (mbsinit (&state));
169 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
171 char input[] = "B\374\337er"; /* "Büßer" */
172 memset (&state, '\0', sizeof (mbstate_t));
174 wc = (wchar_t) 0xBADFACE;
175 ret = mbrtowc (&wc, input, 1, &state);
178 ASSERT (mbsinit (&state));
181 wc = (wchar_t) 0xBADFACE;
182 ret = mbrtowc (&wc, input + 1, 1, &state);
184 ASSERT (wctob (wc) == (unsigned char) '\374');
185 ASSERT (wc == 0x00FC);
186 ASSERT (mbsinit (&state));
189 /* Test support of NULL first argument. */
190 ret = mbrtowc (NULL, input + 2, 3, &state);
192 ASSERT (mbsinit (&state));
194 wc = (wchar_t) 0xBADFACE;
195 ret = mbrtowc (&wc, input + 2, 3, &state);
197 ASSERT (wctob (wc) == (unsigned char) '\337');
198 ASSERT (wc == 0x00DF);
199 ASSERT (mbsinit (&state));
202 wc = (wchar_t) 0xBADFACE;
203 ret = mbrtowc (&wc, input + 3, 2, &state);
206 ASSERT (mbsinit (&state));
209 wc = (wchar_t) 0xBADFACE;
210 ret = mbrtowc (&wc, input + 4, 1, &state);
213 ASSERT (mbsinit (&state));
218 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
220 char input[] = "x\302\341\346y"; /* "xآلوy" */
221 memset (&state, '\0', sizeof (mbstate_t));
223 wc = (wchar_t) 0xBADFACE;
224 ret = mbrtowc (&wc, input, 1, &state);
227 ASSERT (mbsinit (&state));
230 wc = (wchar_t) 0xBADFACE;
231 ret = mbrtowc (&wc, input + 1, 1, &state);
233 ASSERT (wctob (wc) == (unsigned char) '\302');
234 ASSERT (wc == 0x0622);
235 ASSERT (mbsinit (&state));
238 /* Test support of NULL first argument. */
239 ret = mbrtowc (NULL, input + 2, 3, &state);
241 ASSERT (mbsinit (&state));
243 wc = (wchar_t) 0xBADFACE;
244 ret = mbrtowc (&wc, input + 2, 3, &state);
246 ASSERT (wctob (wc) == (unsigned char) '\341');
247 ASSERT (wc == 0x0644);
248 ASSERT (mbsinit (&state));
251 wc = (wchar_t) 0xBADFACE;
252 ret = mbrtowc (&wc, input + 3, 2, &state);
254 ASSERT (wctob (wc) == (unsigned char) '\346');
255 ASSERT (wc == 0x0648);
256 ASSERT (mbsinit (&state));
259 wc = (wchar_t) 0xBADFACE;
260 ret = mbrtowc (&wc, input + 4, 1, &state);
263 ASSERT (mbsinit (&state));
268 /* Locale encoding is CP932, similar to Shift_JIS. */
270 char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
271 memset (&state, '\0', sizeof (mbstate_t));
273 wc = (wchar_t) 0xBADFACE;
274 ret = mbrtowc (&wc, input, 1, &state);
277 ASSERT (mbsinit (&state));
280 wc = (wchar_t) 0xBADFACE;
281 ret = mbrtowc (&wc, input + 1, 2, &state);
283 ASSERT (wctob (wc) == EOF);
284 ASSERT (wc == 0x65E5);
285 ASSERT (mbsinit (&state));
289 wc = (wchar_t) 0xBADFACE;
290 ret = mbrtowc (&wc, input + 3, 1, &state);
291 ASSERT (ret == (size_t)(-2));
292 ASSERT (wc == (wchar_t) 0xBADFACE);
293 ASSERT (!mbsinit (&state));
296 wc = (wchar_t) 0xBADFACE;
297 ret = mbrtowc (&wc, input + 4, 4, &state);
299 ASSERT (wctob (wc) == EOF);
300 ASSERT (wc == 0x672C);
301 ASSERT (mbsinit (&state));
304 /* Test support of NULL first argument. */
305 ret = mbrtowc (NULL, input + 5, 3, &state);
307 ASSERT (mbsinit (&state));
309 wc = (wchar_t) 0xBADFACE;
310 ret = mbrtowc (&wc, input + 5, 3, &state);
312 ASSERT (wctob (wc) == EOF);
313 ASSERT (wc == 0x8A9E);
314 ASSERT (mbsinit (&state));
318 wc = (wchar_t) 0xBADFACE;
319 ret = mbrtowc (&wc, input + 7, 1, &state);
322 ASSERT (mbsinit (&state));
324 /* Test some invalid input. */
325 memset (&state, '\0', sizeof (mbstate_t));
326 wc = (wchar_t) 0xBADFACE;
327 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
328 ASSERT (ret == (size_t)-1);
329 ASSERT (errno == EILSEQ);
331 memset (&state, '\0', sizeof (mbstate_t));
332 wc = (wchar_t) 0xBADFACE;
333 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
334 ASSERT (ret == (size_t)-1);
335 ASSERT (errno == EILSEQ);
340 /* Locale encoding is CP950, similar to Big5. */
342 char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
343 memset (&state, '\0', sizeof (mbstate_t));
345 wc = (wchar_t) 0xBADFACE;
346 ret = mbrtowc (&wc, input, 1, &state);
349 ASSERT (mbsinit (&state));
352 wc = (wchar_t) 0xBADFACE;
353 ret = mbrtowc (&wc, input + 1, 2, &state);
355 ASSERT (wctob (wc) == EOF);
356 ASSERT (wc == 0x65E5);
357 ASSERT (mbsinit (&state));
361 wc = (wchar_t) 0xBADFACE;
362 ret = mbrtowc (&wc, input + 3, 1, &state);
363 ASSERT (ret == (size_t)(-2));
364 ASSERT (wc == (wchar_t) 0xBADFACE);
365 ASSERT (!mbsinit (&state));
368 wc = (wchar_t) 0xBADFACE;
369 ret = mbrtowc (&wc, input + 4, 4, &state);
371 ASSERT (wctob (wc) == EOF);
372 ASSERT (wc == 0x672C);
373 ASSERT (mbsinit (&state));
376 /* Test support of NULL first argument. */
377 ret = mbrtowc (NULL, input + 5, 3, &state);
379 ASSERT (mbsinit (&state));
381 wc = (wchar_t) 0xBADFACE;
382 ret = mbrtowc (&wc, input + 5, 3, &state);
384 ASSERT (wctob (wc) == EOF);
385 ASSERT (wc == 0x8A9E);
386 ASSERT (mbsinit (&state));
390 wc = (wchar_t) 0xBADFACE;
391 ret = mbrtowc (&wc, input + 7, 1, &state);
394 ASSERT (mbsinit (&state));
396 /* Test some invalid input. */
397 memset (&state, '\0', sizeof (mbstate_t));
398 wc = (wchar_t) 0xBADFACE;
399 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
400 ASSERT (ret == (size_t)-1);
401 ASSERT (errno == EILSEQ);
403 memset (&state, '\0', sizeof (mbstate_t));
404 wc = (wchar_t) 0xBADFACE;
405 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
406 ASSERT (ret == (size_t)-1);
407 ASSERT (errno == EILSEQ);
412 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
414 char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
415 memset (&state, '\0', sizeof (mbstate_t));
417 wc = (wchar_t) 0xBADFACE;
418 ret = mbrtowc (&wc, input, 1, &state);
421 ASSERT (mbsinit (&state));
424 wc = (wchar_t) 0xBADFACE;
425 ret = mbrtowc (&wc, input + 1, 2, &state);
427 ASSERT (wctob (wc) == EOF);
428 ASSERT (wc == 0x65E5);
429 ASSERT (mbsinit (&state));
433 wc = (wchar_t) 0xBADFACE;
434 ret = mbrtowc (&wc, input + 3, 1, &state);
435 ASSERT (ret == (size_t)(-2));
436 ASSERT (wc == (wchar_t) 0xBADFACE);
437 ASSERT (!mbsinit (&state));
440 wc = (wchar_t) 0xBADFACE;
441 ret = mbrtowc (&wc, input + 4, 4, &state);
443 ASSERT (wctob (wc) == EOF);
444 ASSERT (wc == 0x672C);
445 ASSERT (mbsinit (&state));
448 /* Test support of NULL first argument. */
449 ret = mbrtowc (NULL, input + 5, 3, &state);
451 ASSERT (mbsinit (&state));
453 wc = (wchar_t) 0xBADFACE;
454 ret = mbrtowc (&wc, input + 5, 3, &state);
456 ASSERT (wctob (wc) == EOF);
457 ASSERT (wc == 0x8A9E);
458 ASSERT (mbsinit (&state));
462 wc = (wchar_t) 0xBADFACE;
463 ret = mbrtowc (&wc, input + 7, 1, &state);
466 ASSERT (mbsinit (&state));
468 /* Test some invalid input. */
469 memset (&state, '\0', sizeof (mbstate_t));
470 wc = (wchar_t) 0xBADFACE;
471 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
472 ASSERT (ret == (size_t)-1);
473 ASSERT (errno == EILSEQ);
475 memset (&state, '\0', sizeof (mbstate_t));
476 wc = (wchar_t) 0xBADFACE;
477 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
478 ASSERT (ret == (size_t)-1);
479 ASSERT (errno == EILSEQ);
484 /* Locale encoding is CP54936 = GB18030. */
486 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
487 memset (&state, '\0', sizeof (mbstate_t));
489 wc = (wchar_t) 0xBADFACE;
490 ret = mbrtowc (&wc, input, 1, &state);
493 ASSERT (mbsinit (&state));
496 wc = (wchar_t) 0xBADFACE;
497 ret = mbrtowc (&wc, input + 1, 1, &state);
498 ASSERT (ret == (size_t)(-2));
499 ASSERT (wc == (wchar_t) 0xBADFACE);
500 ASSERT (!mbsinit (&state));
503 wc = (wchar_t) 0xBADFACE;
504 ret = mbrtowc (&wc, input + 2, 7, &state);
506 ASSERT (wctob (wc) == EOF);
507 ASSERT (wc == 0x00FC);
508 ASSERT (mbsinit (&state));
511 /* Test support of NULL first argument. */
512 ret = mbrtowc (NULL, input + 3, 6, &state);
514 ASSERT (mbsinit (&state));
516 wc = (wchar_t) 0xBADFACE;
517 ret = mbrtowc (&wc, input + 3, 6, &state);
519 ASSERT (wctob (wc) == EOF);
520 ASSERT (wc == 0x00DF);
521 ASSERT (mbsinit (&state));
527 wc = (wchar_t) 0xBADFACE;
528 ret = mbrtowc (&wc, input + 7, 2, &state);
531 ASSERT (mbsinit (&state));
534 wc = (wchar_t) 0xBADFACE;
535 ret = mbrtowc (&wc, input + 8, 1, &state);
538 ASSERT (mbsinit (&state));
540 /* Test some invalid input. */
541 memset (&state, '\0', sizeof (mbstate_t));
542 wc = (wchar_t) 0xBADFACE;
543 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
544 ASSERT (ret == (size_t)-1);
545 ASSERT (errno == EILSEQ);
547 memset (&state, '\0', sizeof (mbstate_t));
548 wc = (wchar_t) 0xBADFACE;
549 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
550 ASSERT (ret == (size_t)-1);
551 ASSERT (errno == EILSEQ);
553 memset (&state, '\0', sizeof (mbstate_t));
554 wc = (wchar_t) 0xBADFACE;
555 ret = mbrtowc (&wc, "\201\045", 2, &state); /* 0x81 0x25 */
556 ASSERT (ret == (size_t)-1);
557 ASSERT (errno == EILSEQ);
559 memset (&state, '\0', sizeof (mbstate_t));
560 wc = (wchar_t) 0xBADFACE;
561 ret = mbrtowc (&wc, "\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
562 ASSERT (ret == (size_t)-1);
563 ASSERT (errno == EILSEQ);
565 memset (&state, '\0', sizeof (mbstate_t));
566 wc = (wchar_t) 0xBADFACE;
567 ret = mbrtowc (&wc, "\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
568 ASSERT (ret == (size_t)-1);
569 ASSERT (errno == EILSEQ);
571 memset (&state, '\0', sizeof (mbstate_t));
572 wc = (wchar_t) 0xBADFACE;
573 ret = mbrtowc (&wc, "\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
574 ASSERT (ret == (size_t)-1);
575 ASSERT (errno == EILSEQ);
580 /* Locale encoding is CP65001 = UTF-8. */
582 char input[] = "B\303\274\303\237er"; /* "Büßer" */
583 memset (&state, '\0', sizeof (mbstate_t));
585 wc = (wchar_t) 0xBADFACE;
586 ret = mbrtowc (&wc, input, 1, &state);
589 ASSERT (mbsinit (&state));
592 wc = (wchar_t) 0xBADFACE;
593 ret = mbrtowc (&wc, input + 1, 1, &state);
594 ASSERT (ret == (size_t)(-2));
595 ASSERT (wc == (wchar_t) 0xBADFACE);
596 ASSERT (!mbsinit (&state));
599 wc = (wchar_t) 0xBADFACE;
600 ret = mbrtowc (&wc, input + 2, 5, &state);
602 ASSERT (wctob (wc) == EOF);
603 ASSERT (wc == 0x00FC);
604 ASSERT (mbsinit (&state));
607 /* Test support of NULL first argument. */
608 ret = mbrtowc (NULL, input + 3, 4, &state);
610 ASSERT (mbsinit (&state));
612 wc = (wchar_t) 0xBADFACE;
613 ret = mbrtowc (&wc, input + 3, 4, &state);
615 ASSERT (wctob (wc) == EOF);
616 ASSERT (wc == 0x00DF);
617 ASSERT (mbsinit (&state));
621 wc = (wchar_t) 0xBADFACE;
622 ret = mbrtowc (&wc, input + 5, 2, &state);
625 ASSERT (mbsinit (&state));
628 wc = (wchar_t) 0xBADFACE;
629 ret = mbrtowc (&wc, input + 6, 1, &state);
632 ASSERT (mbsinit (&state));
634 /* Test some invalid input. */
635 memset (&state, '\0', sizeof (mbstate_t));
636 wc = (wchar_t) 0xBADFACE;
637 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
638 ASSERT (ret == (size_t)-1);
639 ASSERT (errno == EILSEQ);
641 memset (&state, '\0', sizeof (mbstate_t));
642 wc = (wchar_t) 0xBADFACE;
643 ret = mbrtowc (&wc, "\303\300", 2, &state); /* 0xC3 0xC0 */
644 ASSERT (ret == (size_t)-1);
645 ASSERT (errno == EILSEQ);
647 memset (&state, '\0', sizeof (mbstate_t));
648 wc = (wchar_t) 0xBADFACE;
649 ret = mbrtowc (&wc, "\343\300", 2, &state); /* 0xE3 0xC0 */
650 ASSERT (ret == (size_t)-1);
651 ASSERT (errno == EILSEQ);
653 memset (&state, '\0', sizeof (mbstate_t));
654 wc = (wchar_t) 0xBADFACE;
655 ret = mbrtowc (&wc, "\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
656 ASSERT (ret == (size_t)-1);
657 ASSERT (errno == EILSEQ);
659 memset (&state, '\0', sizeof (mbstate_t));
660 wc = (wchar_t) 0xBADFACE;
661 ret = mbrtowc (&wc, "\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
662 ASSERT (ret == (size_t)-1);
663 ASSERT (errno == EILSEQ);
665 memset (&state, '\0', sizeof (mbstate_t));
666 wc = (wchar_t) 0xBADFACE;
667 ret = mbrtowc (&wc, "\363\300", 2, &state); /* 0xF3 0xC0 */
668 ASSERT (ret == (size_t)-1);
669 ASSERT (errno == EILSEQ);
671 memset (&state, '\0', sizeof (mbstate_t));
672 wc = (wchar_t) 0xBADFACE;
673 ret = mbrtowc (&wc, "\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
674 ASSERT (ret == (size_t)-1);
675 ASSERT (errno == EILSEQ);
677 memset (&state, '\0', sizeof (mbstate_t));
678 wc = (wchar_t) 0xBADFACE;
679 ret = mbrtowc (&wc, "\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
680 ASSERT (ret == (size_t)-1);
681 ASSERT (errno == EILSEQ);
683 memset (&state, '\0', sizeof (mbstate_t));
684 wc = (wchar_t) 0xBADFACE;
685 ret = mbrtowc (&wc, "\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
686 ASSERT (ret == (size_t)-1);
687 ASSERT (errno == EILSEQ);
689 memset (&state, '\0', sizeof (mbstate_t));
690 wc = (wchar_t) 0xBADFACE;
691 ret = mbrtowc (&wc, "\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
692 ASSERT (ret == (size_t)-1);
693 ASSERT (errno == EILSEQ);
703 main (int argc, char *argv[])
705 int codepage = atoi (argv[argc - 1]);
710 for (i = 1; i < argc - 1; i++)
712 int ret = test_one_locale (argv[i], codepage);
720 fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
729 main (int argc, char *argv[])
731 fputs ("Skipping test: not a native Windows system\n", stderr);