1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003, 2005-2009, 2015 Free Software Foundation,
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "msgl-iconv.h"
39 #include "po-charset.h"
40 #include "xstriconv.h"
41 #include "xstriconveh.h"
42 #include "msgl-ascii.h"
46 #include "xvasprintf.h"
47 #include "po-xerror.h"
50 #define _(str) gettext (str)
55 static void conversion_error (const struct conversion_context* context)
56 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
57 __attribute__ ((noreturn))
61 conversion_error (const struct conversion_context* context)
63 if (context->to_code == po_charset_utf8)
64 /* If a conversion to UTF-8 fails, the problem lies in the input. */
65 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
66 xasprintf (_("%s: input is not valid in \"%s\" encoding"),
67 context->from_filename, context->from_code));
69 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
71 %s: error while converting from \"%s\" encoding to \"%s\" encoding"),
72 context->from_filename, context->from_code,
79 convert_string_directly (iconv_t cd, const char *string,
80 const struct conversion_context* context)
82 size_t len = strlen (string) + 1;
86 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
87 /* Verify the result has exactly one NUL byte, at the end. */
88 if (resultlen > 0 && result[resultlen - 1] == '\0'
89 && strlen (result) == resultlen - 1)
92 conversion_error (context);
98 convert_string (const iconveh_t *cd, const char *string,
99 const struct conversion_context* context)
101 size_t len = strlen (string) + 1;
103 size_t resultlen = 0;
105 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
106 &result, &resultlen) == 0)
107 /* Verify the result has exactly one NUL byte, at the end. */
108 if (resultlen > 0 && result[resultlen - 1] == '\0'
109 && strlen (result) == resultlen - 1)
112 conversion_error (context);
118 convert_string_list (const iconveh_t *cd, string_list_ty *slp,
119 const struct conversion_context* context)
124 for (i = 0; i < slp->nitems; i++)
125 slp->item[i] = convert_string (cd, slp->item[i], context);
129 convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
130 const struct conversion_context* context)
132 if (mp->prev_msgctxt != NULL)
133 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
134 if (mp->prev_msgid != NULL)
135 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
136 if (mp->prev_msgid_plural != NULL)
137 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
141 convert_msgid (const iconveh_t *cd, message_ty *mp,
142 const struct conversion_context* context)
144 if (mp->msgctxt != NULL)
145 mp->msgctxt = convert_string (cd, mp->msgctxt, context);
146 mp->msgid = convert_string (cd, mp->msgid, context);
147 if (mp->msgid_plural != NULL)
148 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
152 convert_msgstr (const iconveh_t *cd, message_ty *mp,
153 const struct conversion_context* context)
156 size_t resultlen = 0;
158 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
161 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
162 &result, &resultlen) == 0)
163 /* Verify the result has a NUL byte at the end. */
164 if (resultlen > 0 && result[resultlen - 1] == '\0')
165 /* Verify the result has the same number of NUL bytes. */
172 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
174 p += strlen (p) + 1, nulcount1++);
175 for (p = result, pend = p + resultlen, nulcount2 = 0;
177 p += strlen (p) + 1, nulcount2++);
179 if (nulcount1 == nulcount2)
182 mp->msgstr_len = resultlen;
187 conversion_error (context);
194 iconv_message_list_internal (message_list_ty *mlp,
195 const char *canon_from_code,
196 const char *canon_to_code,
198 const char *from_filename)
200 bool canon_from_code_overridden = (canon_from_code != NULL);
204 /* If the list is empty, nothing to do. */
205 if (mlp->nitems == 0)
208 /* Search the header entry, and extract and replace the charset name. */
209 for (j = 0; j < mlp->nitems; j++)
210 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
212 const char *header = mlp->item[j]->msgstr;
216 const char *charsetstr = c_strstr (header, "charset=");
218 if (charsetstr != NULL)
222 const char *canon_charset;
224 charsetstr += strlen ("charset=");
225 len = strcspn (charsetstr, " \t\n");
226 charset = (char *) xmalloca (len + 1);
227 memcpy (charset, charsetstr, len);
230 canon_charset = po_charset_canonicalize (charset);
231 if (canon_charset == NULL)
233 if (!canon_from_code_overridden)
235 /* Don't give an error for POT files, because POT
236 files usually contain only ASCII msgids. */
237 const char *filename = from_filename;
241 && (filenamelen = strlen (filename)) >= 4
242 && memcmp (filename + filenamelen - 4, ".pot", 4)
244 && strcmp (charset, "CHARSET") == 0)
245 canon_charset = po_charset_ascii;
247 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
248 false, xasprintf (_("\
249 present charset \"%s\" is not a portable encoding name"),
255 if (canon_from_code == NULL)
256 canon_from_code = canon_charset;
257 else if (canon_from_code != canon_charset)
258 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
261 two different charsets \"%s\" and \"%s\" in input file"),
262 canon_from_code, canon_charset));
268 size_t len1, len2, len3;
271 len1 = charsetstr - header;
272 len2 = strlen (canon_to_code);
273 len3 = (header + strlen (header)) - (charsetstr + len);
274 new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
275 memcpy (new_header, header, len1);
276 memcpy (new_header + len1, canon_to_code, len2);
277 memcpy (new_header + len1 + len2, charsetstr + len,
279 mlp->item[j]->msgstr = new_header;
280 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
285 if (canon_from_code == NULL)
287 if (is_ascii_message_list (mlp))
288 canon_from_code = po_charset_ascii;
290 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
292 input file doesn't contain a header entry with a charset specification"));
295 msgids_changed = false;
297 /* If the two encodings are the same, nothing to do. */
298 if (canon_from_code != canon_to_code)
302 struct conversion_context context;
304 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
305 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
307 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
308 and iconv() does not support this conversion."),
309 canon_from_code, canon_to_code,
310 basename (program_name)));
312 context.from_code = canon_from_code;
313 context.to_code = canon_to_code;
314 context.from_filename = from_filename;
316 for (j = 0; j < mlp->nitems; j++)
318 message_ty *mp = mlp->item[j];
320 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
321 || !is_ascii_string (mp->msgid))
322 msgids_changed = true;
323 context.message = mp;
324 convert_string_list (&cd, mp->comment, &context);
325 convert_string_list (&cd, mp->comment_dot, &context);
326 convert_prev_msgid (&cd, mp, &context);
327 convert_msgid (&cd, mp, &context);
328 convert_msgstr (&cd, mp, &context);
334 if (message_list_msgids_changed (mlp))
335 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
337 Conversion from \"%s\" to \"%s\" introduces duplicates: \
338 some different msgids become equal."),
339 canon_from_code, canon_to_code));
341 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
343 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
344 This version was built without iconv()."),
345 canon_from_code, canon_to_code,
346 basename (program_name)));
350 return msgids_changed;
354 iconv_message_list (message_list_ty *mlp,
355 const char *canon_from_code, const char *canon_to_code,
356 const char *from_filename)
358 return iconv_message_list_internal (mlp,
359 canon_from_code, canon_to_code, true,
364 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
367 const char *from_filename)
369 const char *canon_to_code;
372 /* Canonicalize target encoding. */
373 canon_to_code = po_charset_canonicalize (to_code);
374 if (canon_to_code == NULL)
375 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
377 target charset \"%s\" is not a portable encoding name."),
380 for (k = 0; k < mdlp->nitems; k++)
381 iconv_message_list_internal (mdlp->item[k]->messages,
382 mdlp->encoding, canon_to_code, update_header,
385 mdlp->encoding = canon_to_code;
392 iconvable_string (const iconveh_t *cd, const char *string)
394 size_t len = strlen (string) + 1;
396 size_t resultlen = 0;
398 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
399 &result, &resultlen) == 0)
401 /* Test if the result has exactly one NUL byte, at the end. */
402 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
403 && strlen (result) == resultlen - 1);
411 iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
416 for (i = 0; i < slp->nitems; i++)
417 if (!iconvable_string (cd, slp->item[i]))
423 iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
425 if (mp->prev_msgctxt != NULL)
426 if (!iconvable_string (cd, mp->prev_msgctxt))
428 if (mp->prev_msgid != NULL)
429 if (!iconvable_string (cd, mp->prev_msgid))
431 if (mp->prev_msgid_plural != NULL)
432 if (!iconvable_string (cd, mp->prev_msgid_plural))
438 iconvable_msgid (const iconveh_t *cd, message_ty *mp)
440 if (mp->msgctxt != NULL)
441 if (!iconvable_string (cd, mp->msgctxt))
443 if (!iconvable_string (cd, mp->msgid))
445 if (mp->msgid_plural != NULL)
446 if (!iconvable_string (cd, mp->msgid_plural))
452 iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
455 size_t resultlen = 0;
457 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
460 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
461 &result, &resultlen) == 0)
465 /* Test if the result has a NUL byte at the end. */
466 if (resultlen > 0 && result[resultlen - 1] == '\0')
467 /* Test if the result has the same number of NUL bytes. */
474 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
476 p += strlen (p) + 1, nulcount1++);
477 for (p = result, pend = p + resultlen, nulcount2 = 0;
479 p += strlen (p) + 1, nulcount2++);
481 if (nulcount1 == nulcount2)
494 is_message_list_iconvable (message_list_ty *mlp,
495 const char *canon_from_code,
496 const char *canon_to_code)
498 bool canon_from_code_overridden = (canon_from_code != NULL);
501 /* If the list is empty, nothing to check. */
502 if (mlp->nitems == 0)
505 /* Search the header entry, and extract the charset name. */
506 for (j = 0; j < mlp->nitems; j++)
507 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
509 const char *header = mlp->item[j]->msgstr;
513 const char *charsetstr = c_strstr (header, "charset=");
515 if (charsetstr != NULL)
519 const char *canon_charset;
521 charsetstr += strlen ("charset=");
522 len = strcspn (charsetstr, " \t\n");
523 charset = (char *) xmalloca (len + 1);
524 memcpy (charset, charsetstr, len);
527 canon_charset = po_charset_canonicalize (charset);
528 if (canon_charset == NULL)
530 if (!canon_from_code_overridden)
532 /* Don't give an error for POT files, because POT
533 files usually contain only ASCII msgids. */
534 if (strcmp (charset, "CHARSET") == 0)
535 canon_charset = po_charset_ascii;
538 /* charset is not a portable encoding name. */
546 if (canon_from_code == NULL)
547 canon_from_code = canon_charset;
548 else if (canon_from_code != canon_charset)
550 /* Two different charsets in input file. */
559 if (canon_from_code == NULL)
561 if (is_ascii_message_list (mlp))
562 canon_from_code = po_charset_ascii;
564 /* Input file lacks a header entry with a charset specification. */
568 /* If the two encodings are the same, nothing to check. */
569 if (canon_from_code != canon_to_code)
574 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
575 /* iconv() doesn't support this conversion. */
578 for (j = 0; j < mlp->nitems; j++)
580 message_ty *mp = mlp->item[j];
582 if (!(iconvable_string_list (&cd, mp->comment)
583 && iconvable_string_list (&cd, mp->comment_dot)
584 && iconvable_prev_msgid (&cd, mp)
585 && iconvable_msgid (&cd, mp)
586 && iconvable_msgstr (&cd, mp)))
592 /* This version was built without iconv(). */