1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
27 #include "localcharset.h"
30 #if defined _WIN32 && !defined __CYGWIN__
33 test_one_locale (const char *name, int codepage)
40 /* Portable code to set the locale. */
42 char name_with_codepage[1024];
44 sprintf (name_with_codepage, "%s.%d", name, codepage);
47 if (setlocale (LC_ALL, name_with_codepage) == NULL)
51 /* Hacky way to set a locale.codepage combination that setlocale() refuses
54 /* Codepage of the current locale, set with setlocale().
55 Not necessarily the same as GetACP(). */
56 extern __declspec(dllimport) unsigned int __lc_codepage;
59 if (setlocale (LC_ALL, name) == NULL)
62 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
63 __lc_codepage = codepage;
81 /* Test whether the codepage is really available. */
82 memset (&state, '\0', sizeof (mbstate_t));
83 if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
88 /* Test zero-length input. */
90 memset (&state, '\0', sizeof (mbstate_t));
91 wc = (wchar_t) 0xBADFACE;
92 ret = mbrtowc (&wc, "x", 0, &state);
93 /* gnulib's implementation returns (size_t)(-2).
94 The AIX 5.1 implementation returns (size_t)(-1).
95 glibc's implementation returns 0. */
96 ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
97 ASSERT (mbsinit (&state));
100 /* Test NUL byte input. */
102 memset (&state, '\0', sizeof (mbstate_t));
103 wc = (wchar_t) 0xBADFACE;
104 ret = mbrtowc (&wc, "", 1, &state);
107 ASSERT (mbsinit (&state));
108 ret = mbrtowc (NULL, "", 1, &state);
110 ASSERT (mbsinit (&state));
113 /* Test single-byte input. */
118 memset (&state, '\0', sizeof (mbstate_t));
119 for (c = 0; c < 0x100; c++)
122 case '\t': case '\v': case '\f':
123 case ' ': case '!': case '"': case '#': case '%':
124 case '&': case '\'': case '(': case ')': case '*':
125 case '+': case ',': case '-': case '.': case '/':
126 case '0': case '1': case '2': case '3': case '4':
127 case '5': case '6': case '7': case '8': case '9':
128 case ':': case ';': case '<': case '=': case '>':
130 case 'A': case 'B': case 'C': case 'D': case 'E':
131 case 'F': case 'G': case 'H': case 'I': case 'J':
132 case 'K': case 'L': case 'M': case 'N': case 'O':
133 case 'P': case 'Q': case 'R': case 'S': case 'T':
134 case 'U': case 'V': case 'W': case 'X': case 'Y':
136 case '[': case '\\': case ']': case '^': case '_':
137 case 'a': case 'b': case 'c': case 'd': case 'e':
138 case 'f': case 'g': case 'h': case 'i': case 'j':
139 case 'k': case 'l': case 'm': case 'n': case 'o':
140 case 'p': case 'q': case 'r': case 's': case 't':
141 case 'u': case 'v': case 'w': case 'x': case 'y':
142 case 'z': case '{': case '|': case '}': case '~':
143 /* c is in the ISO C "basic character set". */
145 wc = (wchar_t) 0xBADFACE;
146 ret = mbrtowc (&wc, buf, 1, &state);
149 ASSERT (mbsinit (&state));
150 ret = mbrtowc (NULL, buf, 1, &state);
152 ASSERT (mbsinit (&state));
157 /* Test special calling convention, passing a NULL pointer. */
159 memset (&state, '\0', sizeof (mbstate_t));
160 wc = (wchar_t) 0xBADFACE;
161 ret = mbrtowc (&wc, NULL, 5, &state);
163 ASSERT (wc == (wchar_t) 0xBADFACE);
164 ASSERT (mbsinit (&state));
170 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
172 char input[] = "B\374\337er"; /* "Büßer" */
173 memset (&state, '\0', sizeof (mbstate_t));
175 wc = (wchar_t) 0xBADFACE;
176 ret = mbrtowc (&wc, input, 1, &state);
179 ASSERT (mbsinit (&state));
182 wc = (wchar_t) 0xBADFACE;
183 ret = mbrtowc (&wc, input + 1, 1, &state);
185 ASSERT (wctob (wc) == (unsigned char) '\374');
186 ASSERT (wc == 0x00FC);
187 ASSERT (mbsinit (&state));
190 /* Test support of NULL first argument. */
191 ret = mbrtowc (NULL, input + 2, 3, &state);
193 ASSERT (mbsinit (&state));
195 wc = (wchar_t) 0xBADFACE;
196 ret = mbrtowc (&wc, input + 2, 3, &state);
198 ASSERT (wctob (wc) == (unsigned char) '\337');
199 ASSERT (wc == 0x00DF);
200 ASSERT (mbsinit (&state));
203 wc = (wchar_t) 0xBADFACE;
204 ret = mbrtowc (&wc, input + 3, 2, &state);
207 ASSERT (mbsinit (&state));
210 wc = (wchar_t) 0xBADFACE;
211 ret = mbrtowc (&wc, input + 4, 1, &state);
214 ASSERT (mbsinit (&state));
219 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
221 char input[] = "x\302\341\346y"; /* "xآلوy" */
222 memset (&state, '\0', sizeof (mbstate_t));
224 wc = (wchar_t) 0xBADFACE;
225 ret = mbrtowc (&wc, input, 1, &state);
228 ASSERT (mbsinit (&state));
231 wc = (wchar_t) 0xBADFACE;
232 ret = mbrtowc (&wc, input + 1, 1, &state);
234 ASSERT (wctob (wc) == (unsigned char) '\302');
235 ASSERT (wc == 0x0622);
236 ASSERT (mbsinit (&state));
239 /* Test support of NULL first argument. */
240 ret = mbrtowc (NULL, input + 2, 3, &state);
242 ASSERT (mbsinit (&state));
244 wc = (wchar_t) 0xBADFACE;
245 ret = mbrtowc (&wc, input + 2, 3, &state);
247 ASSERT (wctob (wc) == (unsigned char) '\341');
248 ASSERT (wc == 0x0644);
249 ASSERT (mbsinit (&state));
252 wc = (wchar_t) 0xBADFACE;
253 ret = mbrtowc (&wc, input + 3, 2, &state);
255 ASSERT (wctob (wc) == (unsigned char) '\346');
256 ASSERT (wc == 0x0648);
257 ASSERT (mbsinit (&state));
260 wc = (wchar_t) 0xBADFACE;
261 ret = mbrtowc (&wc, input + 4, 1, &state);
264 ASSERT (mbsinit (&state));
269 /* Locale encoding is CP932, similar to Shift_JIS. */
271 char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
272 memset (&state, '\0', sizeof (mbstate_t));
274 wc = (wchar_t) 0xBADFACE;
275 ret = mbrtowc (&wc, input, 1, &state);
278 ASSERT (mbsinit (&state));
281 wc = (wchar_t) 0xBADFACE;
282 ret = mbrtowc (&wc, input + 1, 2, &state);
284 ASSERT (wctob (wc) == EOF);
285 ASSERT (wc == 0x65E5);
286 ASSERT (mbsinit (&state));
290 wc = (wchar_t) 0xBADFACE;
291 ret = mbrtowc (&wc, input + 3, 1, &state);
292 ASSERT (ret == (size_t)(-2));
293 ASSERT (wc == (wchar_t) 0xBADFACE);
294 ASSERT (!mbsinit (&state));
297 wc = (wchar_t) 0xBADFACE;
298 ret = mbrtowc (&wc, input + 4, 4, &state);
300 ASSERT (wctob (wc) == EOF);
301 ASSERT (wc == 0x672C);
302 ASSERT (mbsinit (&state));
305 /* Test support of NULL first argument. */
306 ret = mbrtowc (NULL, input + 5, 3, &state);
308 ASSERT (mbsinit (&state));
310 wc = (wchar_t) 0xBADFACE;
311 ret = mbrtowc (&wc, input + 5, 3, &state);
313 ASSERT (wctob (wc) == EOF);
314 ASSERT (wc == 0x8A9E);
315 ASSERT (mbsinit (&state));
319 wc = (wchar_t) 0xBADFACE;
320 ret = mbrtowc (&wc, input + 7, 1, &state);
323 ASSERT (mbsinit (&state));
325 /* Test some invalid input. */
326 memset (&state, '\0', sizeof (mbstate_t));
327 wc = (wchar_t) 0xBADFACE;
328 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
329 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
331 memset (&state, '\0', sizeof (mbstate_t));
332 wc = (wchar_t) 0xBADFACE;
333 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
334 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || (ret == 2 && wc == 0x30FB));
339 /* Locale encoding is CP950, similar to Big5. */
341 char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
342 memset (&state, '\0', sizeof (mbstate_t));
344 wc = (wchar_t) 0xBADFACE;
345 ret = mbrtowc (&wc, input, 1, &state);
348 ASSERT (mbsinit (&state));
351 wc = (wchar_t) 0xBADFACE;
352 ret = mbrtowc (&wc, input + 1, 2, &state);
354 ASSERT (wctob (wc) == EOF);
355 ASSERT (wc == 0x65E5);
356 ASSERT (mbsinit (&state));
360 wc = (wchar_t) 0xBADFACE;
361 ret = mbrtowc (&wc, input + 3, 1, &state);
362 ASSERT (ret == (size_t)(-2));
363 ASSERT (wc == (wchar_t) 0xBADFACE);
364 ASSERT (!mbsinit (&state));
367 wc = (wchar_t) 0xBADFACE;
368 ret = mbrtowc (&wc, input + 4, 4, &state);
370 ASSERT (wctob (wc) == EOF);
371 ASSERT (wc == 0x672C);
372 ASSERT (mbsinit (&state));
375 /* Test support of NULL first argument. */
376 ret = mbrtowc (NULL, input + 5, 3, &state);
378 ASSERT (mbsinit (&state));
380 wc = (wchar_t) 0xBADFACE;
381 ret = mbrtowc (&wc, input + 5, 3, &state);
383 ASSERT (wctob (wc) == EOF);
384 ASSERT (wc == 0x8A9E);
385 ASSERT (mbsinit (&state));
389 wc = (wchar_t) 0xBADFACE;
390 ret = mbrtowc (&wc, input + 7, 1, &state);
393 ASSERT (mbsinit (&state));
395 /* Test some invalid input. */
396 memset (&state, '\0', sizeof (mbstate_t));
397 wc = (wchar_t) 0xBADFACE;
398 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
399 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
401 memset (&state, '\0', sizeof (mbstate_t));
402 wc = (wchar_t) 0xBADFACE;
403 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
404 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || (ret == 2 && wc == '?'));
409 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
411 char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
412 memset (&state, '\0', sizeof (mbstate_t));
414 wc = (wchar_t) 0xBADFACE;
415 ret = mbrtowc (&wc, input, 1, &state);
418 ASSERT (mbsinit (&state));
421 wc = (wchar_t) 0xBADFACE;
422 ret = mbrtowc (&wc, input + 1, 2, &state);
424 ASSERT (wctob (wc) == EOF);
425 ASSERT (wc == 0x65E5);
426 ASSERT (mbsinit (&state));
430 wc = (wchar_t) 0xBADFACE;
431 ret = mbrtowc (&wc, input + 3, 1, &state);
432 ASSERT (ret == (size_t)(-2));
433 ASSERT (wc == (wchar_t) 0xBADFACE);
434 ASSERT (!mbsinit (&state));
437 wc = (wchar_t) 0xBADFACE;
438 ret = mbrtowc (&wc, input + 4, 4, &state);
440 ASSERT (wctob (wc) == EOF);
441 ASSERT (wc == 0x672C);
442 ASSERT (mbsinit (&state));
445 /* Test support of NULL first argument. */
446 ret = mbrtowc (NULL, input + 5, 3, &state);
448 ASSERT (mbsinit (&state));
450 wc = (wchar_t) 0xBADFACE;
451 ret = mbrtowc (&wc, input + 5, 3, &state);
453 ASSERT (wctob (wc) == EOF);
454 ASSERT (wc == 0x8A9E);
455 ASSERT (mbsinit (&state));
459 wc = (wchar_t) 0xBADFACE;
460 ret = mbrtowc (&wc, input + 7, 1, &state);
463 ASSERT (mbsinit (&state));
465 /* Test some invalid input. */
466 memset (&state, '\0', sizeof (mbstate_t));
467 wc = (wchar_t) 0xBADFACE;
468 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
469 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
471 memset (&state, '\0', sizeof (mbstate_t));
472 wc = (wchar_t) 0xBADFACE;
473 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
474 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || (ret == 2 && wc == '?'));
479 /* Locale encoding is CP54936 = GB18030. */
480 if (strcmp (locale_charset (), "GB18030") != 0)
483 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
484 memset (&state, '\0', sizeof (mbstate_t));
486 wc = (wchar_t) 0xBADFACE;
487 ret = mbrtowc (&wc, input, 1, &state);
490 ASSERT (mbsinit (&state));
493 wc = (wchar_t) 0xBADFACE;
494 ret = mbrtowc (&wc, input + 1, 1, &state);
495 ASSERT (ret == (size_t)(-2));
496 ASSERT (wc == (wchar_t) 0xBADFACE);
497 ASSERT (!mbsinit (&state));
500 wc = (wchar_t) 0xBADFACE;
501 ret = mbrtowc (&wc, input + 2, 7, &state);
503 ASSERT (wctob (wc) == EOF);
504 ASSERT (wc == 0x00FC);
505 ASSERT (mbsinit (&state));
508 /* Test support of NULL first argument. */
509 ret = mbrtowc (NULL, input + 3, 6, &state);
511 ASSERT (mbsinit (&state));
513 wc = (wchar_t) 0xBADFACE;
514 ret = mbrtowc (&wc, input + 3, 6, &state);
516 ASSERT (wctob (wc) == EOF);
517 ASSERT (wc == 0x00DF);
518 ASSERT (mbsinit (&state));
524 wc = (wchar_t) 0xBADFACE;
525 ret = mbrtowc (&wc, input + 7, 2, &state);
528 ASSERT (mbsinit (&state));
531 wc = (wchar_t) 0xBADFACE;
532 ret = mbrtowc (&wc, input + 8, 1, &state);
535 ASSERT (mbsinit (&state));
537 /* Test some invalid input. */
538 memset (&state, '\0', sizeof (mbstate_t));
539 wc = (wchar_t) 0xBADFACE;
540 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
541 ASSERT (ret == (size_t)-1);
542 ASSERT (errno == EILSEQ);
544 memset (&state, '\0', sizeof (mbstate_t));
545 wc = (wchar_t) 0xBADFACE;
546 ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
547 ASSERT (ret == (size_t)-1);
548 ASSERT (errno == EILSEQ);
550 memset (&state, '\0', sizeof (mbstate_t));
551 wc = (wchar_t) 0xBADFACE;
552 ret = mbrtowc (&wc, "\201\045", 2, &state); /* 0x81 0x25 */
553 ASSERT (ret == (size_t)-1);
554 ASSERT (errno == EILSEQ);
556 memset (&state, '\0', sizeof (mbstate_t));
557 wc = (wchar_t) 0xBADFACE;
558 ret = mbrtowc (&wc, "\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
559 ASSERT (ret == (size_t)-1);
560 ASSERT (errno == EILSEQ);
562 memset (&state, '\0', sizeof (mbstate_t));
563 wc = (wchar_t) 0xBADFACE;
564 ret = mbrtowc (&wc, "\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
565 ASSERT (ret == (size_t)-1);
566 ASSERT (errno == EILSEQ);
568 memset (&state, '\0', sizeof (mbstate_t));
569 wc = (wchar_t) 0xBADFACE;
570 ret = mbrtowc (&wc, "\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
571 ASSERT (ret == (size_t)-1);
572 ASSERT (errno == EILSEQ);
577 /* Locale encoding is CP65001 = UTF-8. */
578 if (strcmp (locale_charset (), "UTF-8") != 0)
581 char input[] = "B\303\274\303\237er"; /* "Büßer" */
582 memset (&state, '\0', sizeof (mbstate_t));
584 wc = (wchar_t) 0xBADFACE;
585 ret = mbrtowc (&wc, input, 1, &state);
588 ASSERT (mbsinit (&state));
591 wc = (wchar_t) 0xBADFACE;
592 ret = mbrtowc (&wc, input + 1, 1, &state);
593 ASSERT (ret == (size_t)(-2));
594 ASSERT (wc == (wchar_t) 0xBADFACE);
595 ASSERT (!mbsinit (&state));
598 wc = (wchar_t) 0xBADFACE;
599 ret = mbrtowc (&wc, input + 2, 5, &state);
601 ASSERT (wctob (wc) == EOF);
602 ASSERT (wc == 0x00FC);
603 ASSERT (mbsinit (&state));
606 /* Test support of NULL first argument. */
607 ret = mbrtowc (NULL, input + 3, 4, &state);
609 ASSERT (mbsinit (&state));
611 wc = (wchar_t) 0xBADFACE;
612 ret = mbrtowc (&wc, input + 3, 4, &state);
614 ASSERT (wctob (wc) == EOF);
615 ASSERT (wc == 0x00DF);
616 ASSERT (mbsinit (&state));
620 wc = (wchar_t) 0xBADFACE;
621 ret = mbrtowc (&wc, input + 5, 2, &state);
624 ASSERT (mbsinit (&state));
627 wc = (wchar_t) 0xBADFACE;
628 ret = mbrtowc (&wc, input + 6, 1, &state);
631 ASSERT (mbsinit (&state));
633 /* Test some invalid input. */
634 memset (&state, '\0', sizeof (mbstate_t));
635 wc = (wchar_t) 0xBADFACE;
636 ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
637 ASSERT (ret == (size_t)-1);
638 ASSERT (errno == EILSEQ);
640 memset (&state, '\0', sizeof (mbstate_t));
641 wc = (wchar_t) 0xBADFACE;
642 ret = mbrtowc (&wc, "\303\300", 2, &state); /* 0xC3 0xC0 */
643 ASSERT (ret == (size_t)-1);
644 ASSERT (errno == EILSEQ);
646 memset (&state, '\0', sizeof (mbstate_t));
647 wc = (wchar_t) 0xBADFACE;
648 ret = mbrtowc (&wc, "\343\300", 2, &state); /* 0xE3 0xC0 */
649 ASSERT (ret == (size_t)-1);
650 ASSERT (errno == EILSEQ);
652 memset (&state, '\0', sizeof (mbstate_t));
653 wc = (wchar_t) 0xBADFACE;
654 ret = mbrtowc (&wc, "\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
655 ASSERT (ret == (size_t)-1);
656 ASSERT (errno == EILSEQ);
658 memset (&state, '\0', sizeof (mbstate_t));
659 wc = (wchar_t) 0xBADFACE;
660 ret = mbrtowc (&wc, "\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
661 ASSERT (ret == (size_t)-1);
662 ASSERT (errno == EILSEQ);
664 memset (&state, '\0', sizeof (mbstate_t));
665 wc = (wchar_t) 0xBADFACE;
666 ret = mbrtowc (&wc, "\363\300", 2, &state); /* 0xF3 0xC0 */
667 ASSERT (ret == (size_t)-1);
668 ASSERT (errno == EILSEQ);
670 memset (&state, '\0', sizeof (mbstate_t));
671 wc = (wchar_t) 0xBADFACE;
672 ret = mbrtowc (&wc, "\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
673 ASSERT (ret == (size_t)-1);
674 ASSERT (errno == EILSEQ);
676 memset (&state, '\0', sizeof (mbstate_t));
677 wc = (wchar_t) 0xBADFACE;
678 ret = mbrtowc (&wc, "\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
679 ASSERT (ret == (size_t)-1);
680 ASSERT (errno == EILSEQ);
682 memset (&state, '\0', sizeof (mbstate_t));
683 wc = (wchar_t) 0xBADFACE;
684 ret = mbrtowc (&wc, "\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
685 ASSERT (ret == (size_t)-1);
686 ASSERT (errno == EILSEQ);
688 memset (&state, '\0', sizeof (mbstate_t));
689 wc = (wchar_t) 0xBADFACE;
690 ret = mbrtowc (&wc, "\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
691 ASSERT (ret == (size_t)-1);
692 ASSERT (errno == EILSEQ);
702 main (int argc, char *argv[])
704 int codepage = atoi (argv[argc - 1]);
709 for (i = 1; i < argc - 1; i++)
711 int ret = test_one_locale (argv[i], codepage);
719 fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
728 main (int argc, char *argv[])
730 fputs ("Skipping test: not a native Windows system\n", stderr);