1 /* winduni.c -- unicode support for the windres program.
2 Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
3 Free Software Foundation, Inc.
4 Written by Ian Lance Taylor, Cygnus Support.
5 Rewritten by Kai Tietz, Onevision.
7 This file is part of GNU Binutils.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
25 /* This file contains unicode support routines for the windres
26 program. Ideally, we would have generic unicode support which
27 would work on all systems. However, we don't. Instead, on a
28 Windows host, we are prepared to call some Windows routines. This
29 means that we will generate different output on Windows and Unix
30 hosts, but that seems better than not really supporting unicode at
35 #include "libiberty.h" /* for xstrdup */
37 /* Must be include before windows.h and winnls.h. */
38 #if defined (_WIN32) || defined (__CYGWIN__)
43 #include "safe-ctype.h"
49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
51 static int unichar_isascii (const unichar *, rc_uint_type);
53 /* Convert an ASCII string to a unicode string. We just copy it,
54 expanding chars to shorts, rather than doing something intelligent. */
56 #if !defined (_WIN32) && !defined (__CYGWIN__)
58 /* Codepages mapped. */
59 static local_iconv_map codepages[] =
62 { 1, "WINDOWS-1252" },
65 { 775, "WINBALTRIM" },
72 { 874, "WINDOWS-874" },
77 { 1250, "WINDOWS-1250" },
78 { 1251, "WINDOWS-1251" },
79 { 1252, "WINDOWS-1252" },
80 { 1253, "WINDOWS-1253" },
81 { 1254, "WINDOWS-1254" },
82 { 1255, "WINDOWS-1255" },
83 { 1256, "WINDOWS-1256" },
84 { 1257, "WINDOWS-1257" },
85 { 1258, "WINDOWS-1258" },
88 { CP_UTF16, "UTF-16" },
89 { (rc_uint_type) -1, NULL }
92 /* Languages supported. */
93 static const wind_language_t languages[] =
95 { 0x0000, 437, 1252, "Neutral", "Neutral" },
96 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
97 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
98 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
99 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
100 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
101 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
102 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
103 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
104 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
105 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
106 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
107 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
108 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
109 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
110 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
111 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
112 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
113 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
114 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
115 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
116 { 0x042D, 850, 1252, "Basque", "Spain" },
117 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
118 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
119 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
120 { 0x043C, 437, 1252, "Irish", "Ireland" },
121 { 0x043E, 850, 1252, "Malay", "Malaysia" },
122 { 0x0801, 864, 1256, "Arabic", "Iraq" },
123 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
124 { 0x0807, 850, 1252, "German", "Switzerland" },
125 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
126 { 0x080C, 850, 1252, "French", "Belgium" },
127 { 0x0810, 850, 1252, "Italian", "Switzerland" },
128 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
129 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
130 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
131 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
132 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
133 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
134 { 0x0C07, 850, 1252, "German", "Austria" },
135 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
136 { 0x0C0C, 850, 1252, "French", "Canada"},
137 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
138 { 0x1001, 864, 1256, "Arabic", "Libya" },
139 { 0x1004, 936, 936, "Chinese", "Singapore" },
140 { 0x1007, 850, 1252, "German", "Luxembourg" },
141 { 0x1009, 850, 1252, "English", "Canada" },
142 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
143 { 0x100C, 850, 1252, "French", "Switzerland" },
144 { 0x1401, 864, 1256, "Arabic", "Algeria" },
145 { 0x1407, 850, 1252, "German", "Liechtenstein" },
146 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
147 { 0x140C, 850, 1252, "French", "Luxembourg" },
148 { 0x1801, 864, 1256, "Arabic", "Morocco" },
149 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
150 { 0x180C, 850, 1252, "French", "Monaco" },
151 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
152 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
153 { 0x2001, 864, 1256, "Arabic", "Oman" },
154 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
155 { 0x2401, 864, 1256, "Arabic", "Yemen" },
156 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
157 { 0x2801, 864, 1256, "Arabic", "Syria" },
158 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
159 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
160 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
161 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
162 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
163 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
164 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
165 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
166 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
167 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
168 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
169 { 0x4001, 864, 1256, "Arabic", "Qatar" },
170 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
171 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
172 { 0x480A, 850, 1252, "Spanish", "Honduras" },
173 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
174 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
175 { (unsigned) -1, 0, 0, NULL, NULL }
180 /* Specifies the default codepage to be used for unicode
181 transformations. By default this is CP_ACP. */
182 rc_uint_type wind_default_codepage = CP_ACP;
184 /* Specifies the currently used codepage for unicode
185 transformations. By default this is CP_ACP. */
186 rc_uint_type wind_current_codepage = CP_ACP;
188 /* Convert an ASCII string to a unicode string. We just copy it,
189 expanding chars to shorts, rather than doing something intelligent. */
192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
194 unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
197 /* Convert an unicode string to an ASCII string. We just copy it,
198 shrink shorts to chars, rather than doing something intelligent.
199 Shorts with not within the char range are replaced by '_'. */
202 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
204 codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
207 /* Print the unicode string UNICODE to the file E. LENGTH is the
208 number of characters to print, or -1 if we should print until the
209 end of the string. FIXME: On a Windows host, we should be calling
210 some Windows function, probably WideCharToMultiByte. */
213 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
221 if ((bfd_signed_vma) length > 0)
226 if (ch == 0 && (bfd_signed_vma) length < 0)
231 if ((ch & 0x7f) == ch)
237 else if (ISPRINT (ch))
272 fprintf (e, "\\%03o", (unsigned int) ch);
277 else if ((ch & 0xff) == ch)
278 fprintf (e, "\\%03o", (unsigned int) ch);
280 fprintf (e, "\\x%04x", (unsigned int) ch);
284 /* Print a unicode string to a file. */
287 ascii_print (FILE *e, const char *s, rc_uint_type length)
295 if ((bfd_signed_vma) length > 0)
300 if (ch == 0 && (bfd_signed_vma) length < 0)
305 if ((ch & 0x7f) == ch)
311 else if (ISPRINT (ch))
346 fprintf (e, "\\%03o", (unsigned int) ch);
352 fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
357 unichar_len (const unichar *unicode)
362 while (unicode[r] != 0)
370 unichar_dup (const unichar *unicode)
377 for (len = 0; unicode[len] != 0; ++len)
380 r = ((unichar *) res_alloc (len * sizeof (unichar)));
381 memcpy (r, unicode, len * sizeof (unichar));
386 unichar_dup_uppercase (const unichar *u)
388 unichar *r = unichar_dup (u);
394 for (i = 0; r[i] != 0; ++i)
396 if (r[i] >= 'a' && r[i] <= 'z')
403 unichar_isascii (const unichar *u, rc_uint_type len)
407 if ((bfd_signed_vma) len < 0)
410 len = (rc_uint_type) unichar_len (u);
415 for (i = 0; i < len; i++)
416 if ((u[i] & 0xff80) != 0)
422 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
424 if (! unichar_isascii (u, len))
427 unicode_print (e, u, len);
432 unicode_is_valid_codepage (rc_uint_type cp)
434 if ((cp & 0xffff) != cp)
436 if (cp == CP_UTF16 || cp == CP_ACP)
439 #if !defined (_WIN32) && !defined (__CYGWIN__)
440 if (! wind_find_codepage_info (cp))
444 return !! IsValidCodePage ((UINT) cp);
448 #if defined (_WIN32) || defined (__CYGWIN__)
450 #define max_cp_string_len 6
453 codepage_from_langid (unsigned short langid)
455 char cp_string [max_cp_string_len];
458 memset (cp_string, 0, max_cp_string_len);
459 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
460 but is unavailable on Win95. */
461 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
462 LOCALE_IDEFAULTANSICODEPAGE,
463 cp_string, max_cp_string_len);
464 /* If codepage data for an LCID is not installed on users's system,
465 GetLocaleInfo returns an empty string. Fall back to system ANSI
469 return strtoul (cp_string, 0, 10);
473 wincodepage_from_langid (unsigned short langid)
475 char cp_string [max_cp_string_len];
478 memset (cp_string, 0, max_cp_string_len);
479 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
480 but is unavailable on Win95. */
481 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
482 LOCALE_IDEFAULTCODEPAGE,
483 cp_string, max_cp_string_len);
484 /* If codepage data for an LCID is not installed on users's system,
485 GetLocaleInfo returns an empty string. Fall back to system ANSI
489 return strtoul (cp_string, 0, 10);
493 lang_from_langid (unsigned short langid)
498 memset (cp_string, 0, 261);
499 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
502 /* If codepage data for an LCID is not installed on users's system,
503 GetLocaleInfo returns an empty string. Fall back to system ANSI
506 strcpy (cp_string, "Neutral");
507 return xstrdup (cp_string);
511 country_from_langid (unsigned short langid)
516 memset (cp_string, 0, 261);
517 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
520 /* If codepage data for an LCID is not installed on users's system,
521 GetLocaleInfo returns an empty string. Fall back to system ANSI
524 strcpy (cp_string, "Neutral");
525 return xstrdup (cp_string);
530 const wind_language_t *
531 wind_find_language_by_id (unsigned id)
533 #if !defined (_WIN32) && !defined (__CYGWIN__)
538 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
540 if (languages[i].id == id)
541 return &languages[i];
544 static wind_language_t wl;
547 wl.doscp = codepage_from_langid ((unsigned short) id);
548 wl.wincp = wincodepage_from_langid ((unsigned short) id);
549 wl.name = lang_from_langid ((unsigned short) id);
550 wl.country = country_from_langid ((unsigned short) id);
556 const local_iconv_map *
557 wind_find_codepage_info (unsigned cp)
559 #if !defined (_WIN32) && !defined (__CYGWIN__)
562 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
564 if (codepages[i].codepage == (rc_uint_type) -1)
566 return &codepages[i];
568 static local_iconv_map lim;
569 if (!unicode_is_valid_codepage (cp))
577 /* Convert an Codepage string to a unicode string. */
580 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
584 len = wind_MultiByteToWideChar (cp, src, NULL, 0);
587 *u = ((unichar *) res_alloc (len));
588 wind_MultiByteToWideChar (cp, src, *u, len);
590 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
591 this will set *length to -1. */
592 len -= sizeof (unichar);
595 *length = len / sizeof (unichar);
598 /* Convert an unicode string to an codepage string. */
601 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
605 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
608 *ascii = (char *) res_alloc (len * sizeof (char));
609 wind_WideCharToMultiByte (cp, unicode, *ascii, len);
611 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
612 this will set *length to -1. */
619 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
621 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
625 for (i = 1; i <= 32; i++)
628 ICONV_CONST char *tmp_s = s;
630 size_t s_left = (size_t) i;
631 size_t d_left = (size_t) d_len;
633 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
635 if (ret != (size_t) -1)
647 wind_iconv_cp (rc_uint_type cp)
649 const local_iconv_map *lim = wind_find_codepage_info (cp);
653 return lim->iconv_name;
655 #endif /* HAVE_ICONV */
658 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
659 unichar *u, rc_uint_type u_len)
661 rc_uint_type ret = 0;
663 #if defined (_WIN32) || defined (__CYGWIN__)
664 rc_uint_type conv_flags = MB_PRECOMPOSED;
666 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
667 MultiByteToWideChar will set the last error to
668 ERROR_INVALID_FLAGS if we do. */
669 if (cp == CP_UTF8 || cp == CP_UTF7)
672 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
674 /* Convert to bytes. */
675 ret *= sizeof (unichar);
677 #elif defined (HAVE_ICONV)
681 const char *iconv_name = wind_iconv_cp (cp);
683 if (!mb || !iconv_name)
685 iconv_t cd = iconv_open ("UTF-16", iconv_name);
690 const char *n_mb = "";
694 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
702 size_t l_tmp = (size_t) (n_tmp - p_tmp);
706 if ((size_t) u_len < l_tmp)
708 memcpy (u, tmp, l_tmp);
716 if (tmp[0] == 0 && tmp[1] == 0)
724 ret = strlen (mb) + 1;
725 ret *= sizeof (unichar);
726 if (u != NULL && u_len != 0)
730 *u++ = ((unichar) *mb) & 0xff;
733 while (u_len != 0 && mb[-1] != 0);
735 if (u != NULL && u_len != 0)
742 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
744 rc_uint_type ret = 0;
745 #if defined (_WIN32) || defined (__CYGWIN__)
746 WINBOOL used_def = FALSE;
748 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
750 #elif defined (HAVE_ICONV)
754 const char *iconv_name = wind_iconv_cp (cp);
756 if (!u || !iconv_name)
758 iconv_t cd = iconv_open (iconv_name, "UTF-16");
763 const char *n_u = "";
767 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
775 size_t l_tmp = (size_t) (n_tmp - p_tmp);
779 if ((size_t) mb_len < l_tmp)
781 memcpy (mb, tmp, l_tmp);
791 u = (const unichar *) n_u;
805 while (*u != 0 && mb_len != 0)
807 if (u[0] == (u[0] & 0x7f))