1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003, 2005-2009 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "msgl-iconv.h"
38 #include "po-charset.h"
39 #include "xstriconv.h"
40 #include "xstriconveh.h"
41 #include "msgl-ascii.h"
45 #include "xvasprintf.h"
46 #include "po-xerror.h"
49 #define _(str) gettext (str)
54 static void conversion_error (const struct conversion_context* context)
55 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
56 __attribute__ ((noreturn))
60 conversion_error (const struct conversion_context* context)
62 if (context->to_code == po_charset_utf8)
63 /* If a conversion to UTF-8 fails, the problem lies in the input. */
64 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
65 xasprintf (_("%s: input is not valid in \"%s\" encoding"),
66 context->from_filename, context->from_code));
68 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
70 %s: error while converting from \"%s\" encoding to \"%s\" encoding"),
71 context->from_filename, context->from_code,
78 convert_string_directly (iconv_t cd, const char *string,
79 const struct conversion_context* context)
81 size_t len = strlen (string) + 1;
85 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
86 /* Verify the result has exactly one NUL byte, at the end. */
87 if (resultlen > 0 && result[resultlen - 1] == '\0'
88 && strlen (result) == resultlen - 1)
91 conversion_error (context);
97 convert_string (const iconveh_t *cd, const char *string,
98 const struct conversion_context* context)
100 size_t len = strlen (string) + 1;
102 size_t resultlen = 0;
104 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
105 &result, &resultlen) == 0)
106 /* Verify the result has exactly one NUL byte, at the end. */
107 if (resultlen > 0 && result[resultlen - 1] == '\0'
108 && strlen (result) == resultlen - 1)
111 conversion_error (context);
117 convert_string_list (const iconveh_t *cd, string_list_ty *slp,
118 const struct conversion_context* context)
123 for (i = 0; i < slp->nitems; i++)
124 slp->item[i] = convert_string (cd, slp->item[i], context);
128 convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
129 const struct conversion_context* context)
131 if (mp->prev_msgctxt != NULL)
132 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
133 if (mp->prev_msgid != NULL)
134 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
135 if (mp->prev_msgid_plural != NULL)
136 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
140 convert_msgid (const iconveh_t *cd, message_ty *mp,
141 const struct conversion_context* context)
143 if (mp->msgctxt != NULL)
144 mp->msgctxt = convert_string (cd, mp->msgctxt, context);
145 mp->msgid = convert_string (cd, mp->msgid, context);
146 if (mp->msgid_plural != NULL)
147 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
151 convert_msgstr (const iconveh_t *cd, message_ty *mp,
152 const struct conversion_context* context)
155 size_t resultlen = 0;
157 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
160 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
161 &result, &resultlen) == 0)
162 /* Verify the result has a NUL byte at the end. */
163 if (resultlen > 0 && result[resultlen - 1] == '\0')
164 /* Verify the result has the same number of NUL bytes. */
171 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
173 p += strlen (p) + 1, nulcount1++);
174 for (p = result, pend = p + resultlen, nulcount2 = 0;
176 p += strlen (p) + 1, nulcount2++);
178 if (nulcount1 == nulcount2)
181 mp->msgstr_len = resultlen;
186 conversion_error (context);
193 iconv_message_list_internal (message_list_ty *mlp,
194 const char *canon_from_code,
195 const char *canon_to_code,
197 const char *from_filename)
199 bool canon_from_code_overridden = (canon_from_code != NULL);
203 /* If the list is empty, nothing to do. */
204 if (mlp->nitems == 0)
207 /* Search the header entry, and extract and replace the charset name. */
208 for (j = 0; j < mlp->nitems; j++)
209 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
211 const char *header = mlp->item[j]->msgstr;
215 const char *charsetstr = c_strstr (header, "charset=");
217 if (charsetstr != NULL)
221 const char *canon_charset;
223 charsetstr += strlen ("charset=");
224 len = strcspn (charsetstr, " \t\n");
225 charset = (char *) xmalloca (len + 1);
226 memcpy (charset, charsetstr, len);
229 canon_charset = po_charset_canonicalize (charset);
230 if (canon_charset == NULL)
232 if (!canon_from_code_overridden)
234 /* Don't give an error for POT files, because POT
235 files usually contain only ASCII msgids. */
236 const char *filename = from_filename;
240 && (filenamelen = strlen (filename)) >= 4
241 && memcmp (filename + filenamelen - 4, ".pot", 4)
243 && strcmp (charset, "CHARSET") == 0)
244 canon_charset = po_charset_ascii;
246 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
247 false, xasprintf (_("\
248 present charset \"%s\" is not a portable encoding name"),
254 if (canon_from_code == NULL)
255 canon_from_code = canon_charset;
256 else if (canon_from_code != canon_charset)
257 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
260 two different charsets \"%s\" and \"%s\" in input file"),
261 canon_from_code, canon_charset));
267 size_t len1, len2, len3;
270 len1 = charsetstr - header;
271 len2 = strlen (canon_to_code);
272 len3 = (header + strlen (header)) - (charsetstr + len);
273 new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
274 memcpy (new_header, header, len1);
275 memcpy (new_header + len1, canon_to_code, len2);
276 memcpy (new_header + len1 + len2, charsetstr + len,
278 mlp->item[j]->msgstr = new_header;
279 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
284 if (canon_from_code == NULL)
286 if (is_ascii_message_list (mlp))
287 canon_from_code = po_charset_ascii;
289 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
291 input file doesn't contain a header entry with a charset specification"));
294 msgids_changed = false;
296 /* If the two encodings are the same, nothing to do. */
297 if (canon_from_code != canon_to_code)
301 struct conversion_context context;
303 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
304 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
306 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
307 and iconv() does not support this conversion."),
308 canon_from_code, canon_to_code,
309 basename (program_name)));
311 context.from_code = canon_from_code;
312 context.to_code = canon_to_code;
313 context.from_filename = from_filename;
315 for (j = 0; j < mlp->nitems; j++)
317 message_ty *mp = mlp->item[j];
319 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
320 || !is_ascii_string (mp->msgid))
321 msgids_changed = true;
322 context.message = mp;
323 convert_string_list (&cd, mp->comment, &context);
324 convert_string_list (&cd, mp->comment_dot, &context);
325 convert_prev_msgid (&cd, mp, &context);
326 convert_msgid (&cd, mp, &context);
327 convert_msgstr (&cd, mp, &context);
333 if (message_list_msgids_changed (mlp))
334 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
336 Conversion from \"%s\" to \"%s\" introduces duplicates: \
337 some different msgids become equal."),
338 canon_from_code, canon_to_code));
340 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
342 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
343 This version was built without iconv()."),
344 canon_from_code, canon_to_code,
345 basename (program_name)));
349 return msgids_changed;
353 iconv_message_list (message_list_ty *mlp,
354 const char *canon_from_code, const char *canon_to_code,
355 const char *from_filename)
357 return iconv_message_list_internal (mlp,
358 canon_from_code, canon_to_code, true,
363 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
366 const char *from_filename)
368 const char *canon_to_code;
371 /* Canonicalize target encoding. */
372 canon_to_code = po_charset_canonicalize (to_code);
373 if (canon_to_code == NULL)
374 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
376 target charset \"%s\" is not a portable encoding name."),
379 for (k = 0; k < mdlp->nitems; k++)
380 iconv_message_list_internal (mdlp->item[k]->messages,
381 mdlp->encoding, canon_to_code, update_header,
384 mdlp->encoding = canon_to_code;
391 iconvable_string (const iconveh_t *cd, const char *string)
393 size_t len = strlen (string) + 1;
395 size_t resultlen = 0;
397 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
398 &result, &resultlen) == 0)
400 /* Test if the result has exactly one NUL byte, at the end. */
401 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
402 && strlen (result) == resultlen - 1);
410 iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
415 for (i = 0; i < slp->nitems; i++)
416 if (!iconvable_string (cd, slp->item[i]))
422 iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
424 if (mp->prev_msgctxt != NULL)
425 if (!iconvable_string (cd, mp->prev_msgctxt))
427 if (mp->prev_msgid != NULL)
428 if (!iconvable_string (cd, mp->prev_msgid))
430 if (mp->prev_msgid_plural != NULL)
431 if (!iconvable_string (cd, mp->prev_msgid_plural))
437 iconvable_msgid (const iconveh_t *cd, message_ty *mp)
439 if (mp->msgctxt != NULL)
440 if (!iconvable_string (cd, mp->msgctxt))
442 if (!iconvable_string (cd, mp->msgid))
444 if (mp->msgid_plural != NULL)
445 if (!iconvable_string (cd, mp->msgid_plural))
451 iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
454 size_t resultlen = 0;
456 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
459 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
460 &result, &resultlen) == 0)
464 /* Test if the result has a NUL byte at the end. */
465 if (resultlen > 0 && result[resultlen - 1] == '\0')
466 /* Test if the result has the same number of NUL bytes. */
473 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
475 p += strlen (p) + 1, nulcount1++);
476 for (p = result, pend = p + resultlen, nulcount2 = 0;
478 p += strlen (p) + 1, nulcount2++);
480 if (nulcount1 == nulcount2)
493 is_message_list_iconvable (message_list_ty *mlp,
494 const char *canon_from_code,
495 const char *canon_to_code)
497 bool canon_from_code_overridden = (canon_from_code != NULL);
500 /* If the list is empty, nothing to check. */
501 if (mlp->nitems == 0)
504 /* Search the header entry, and extract the charset name. */
505 for (j = 0; j < mlp->nitems; j++)
506 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
508 const char *header = mlp->item[j]->msgstr;
512 const char *charsetstr = c_strstr (header, "charset=");
514 if (charsetstr != NULL)
518 const char *canon_charset;
520 charsetstr += strlen ("charset=");
521 len = strcspn (charsetstr, " \t\n");
522 charset = (char *) xmalloca (len + 1);
523 memcpy (charset, charsetstr, len);
526 canon_charset = po_charset_canonicalize (charset);
527 if (canon_charset == NULL)
529 if (!canon_from_code_overridden)
531 /* Don't give an error for POT files, because POT
532 files usually contain only ASCII msgids. */
533 if (strcmp (charset, "CHARSET") == 0)
534 canon_charset = po_charset_ascii;
537 /* charset is not a portable encoding name. */
545 if (canon_from_code == NULL)
546 canon_from_code = canon_charset;
547 else if (canon_from_code != canon_charset)
549 /* Two different charsets in input file. */
558 if (canon_from_code == NULL)
560 if (is_ascii_message_list (mlp))
561 canon_from_code = po_charset_ascii;
563 /* Input file lacks a header entry with a charset specification. */
567 /* If the two encodings are the same, nothing to check. */
568 if (canon_from_code != canon_to_code)
573 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
574 /* iconv() doesn't support this conversion. */
577 for (j = 0; j < mlp->nitems; j++)
579 message_ty *mp = mlp->item[j];
581 if (!(iconvable_string_list (&cd, mp->comment)
582 && iconvable_string_list (&cd, mp->comment_dot)
583 && iconvable_prev_msgid (&cd, mp)
584 && iconvable_msgid (&cd, mp)
585 && iconvable_msgstr (&cd, mp)))
591 /* This version was built without iconv(). */