gettext-tools/src/write-po.c

   1 /* GNU gettext - internationalization aids
   2    Copyright (C) 1995-1998, 2000-2010, 2012 Free Software Foundation, Inc.
   3
   4    This file was written by Peter Miller <millerp@canb.auug.org.au>
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22 #include <alloca.h>
  23
  24 /* Specification.  */
  25 #include "write-po.h"
  26
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #if HAVE_ICONV
  34 # include <iconv.h>
  35 #endif
  36
  37 #include "c-ctype.h"
  38 #include "po-charset.h"
  39 #include "format.h"
  40 #include "unilbrk.h"
  41 #include "msgl-ascii.h"
  42 #include "write-catalog.h"
  43 #include "xalloc.h"
  44 #include "xmalloca.h"
  45 #include "c-strstr.h"
  46 #include "ostream.h"
  47 #ifdef GETTEXTDATADIR
  48 # include "styled-ostream.h"
  49 #endif
  50 #include "xvasprintf.h"
  51 #include "po-xerror.h"
  52 #include "gettext.h"
  53
  54 /* Our regular abbreviation.  */
  55 #define _(str) gettext (str)
  56
  57 #if HAVE_DECL_PUTC_UNLOCKED
  58 # undef putc
  59 # define putc putc_unlocked
  60 #endif
  61
  62
  63 /* =================== Putting together a #, flags line. =================== */
  64
  65
  66 /* Convert IS_FORMAT in the context of programming language LANG to a flag
  67    string for use in #, flags.  */
  68
  69 const char *
  70 make_format_description_string (enum is_format is_format, const char *lang,
  71                                 bool debug)
  72 {
  73   static char result[100];
  74
  75   switch (is_format)
  76     {
  77     case possible:
  78       if (debug)
  79         {
  80           sprintf (result, "possible-%s-format", lang);
  81           break;
  82         }
  83       /* FALLTHROUGH */
  84     case yes_according_to_context:
  85     case yes:
  86       sprintf (result, "%s-format", lang);
  87       break;
  88     case no:
  89       sprintf (result, "no-%s-format", lang);
  90       break;
  91     default:
  92       /* The others have already been filtered out by significant_format_p.  */
  93       abort ();
  94     }
  95
  96   return result;
  97 }
  98
  99
 100 /* Return true if IS_FORMAT is worth mentioning in a #, flags list.  */
 101
 102 bool
 103 significant_format_p (enum is_format is_format)
 104 {
 105   return is_format != undecided && is_format != impossible;
 106 }
 107
 108
 109 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list.  */
 110
 111 static bool
 112 has_significant_format_p (const enum is_format is_format[NFORMATS])
 113 {
 114   size_t i;
 115
 116   for (i = 0; i < NFORMATS; i++)
 117     if (significant_format_p (is_format[i]))
 118       return true;
 119   return false;
 120 }
 121
 122
 123 /* Convert a RANGE to a freshly allocated string for use in #, flags.  */
 124
 125 char *
 126 make_range_description_string (struct argument_range range)
 127 {
 128   return xasprintf ("range: %d..%d", range.min, range.max);
 129 }
 130
 131
 132 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags.  */
 133
 134 static const char *
 135 make_c_width_description_string (enum is_wrap do_wrap)
 136 {
 137   const char *result = NULL;
 138
 139   switch (do_wrap)
 140     {
 141     case yes:
 142       result = "wrap";
 143       break;
 144     case no:
 145       result = "no-wrap";
 146       break;
 147     default:
 148       abort ();
 149     }
 150
 151   return result;
 152 }
 153
 154
 155 /* ========================== Styling primitives. ========================== */
 156
 157
 158 /* When compiled in src, enable styling support.
 159    When compiled in libgettextpo, don't enable styling support.  */
 160 #ifdef GETTEXTDATADIR
 161
 162 /* Return true if the stream is an instance of styled_ostream_t.  */
 163 static inline bool
 164 is_stylable (ostream_t stream)
 165 {
 166   return IS_INSTANCE (stream, ostream, styled_ostream);
 167 }
 168
 169 /* Start a run of text belonging to a given CSS class.  */
 170 static void
 171 begin_css_class (ostream_t stream, const char *classname)
 172 {
 173   if (is_stylable (stream))
 174     styled_ostream_begin_use_class ((styled_ostream_t) stream, classname);
 175 }
 176
 177 /* End a run of text belonging to a given CSS class.  */
 178 static void
 179 end_css_class (ostream_t stream, const char *classname)
 180 {
 181   if (is_stylable (stream))
 182     styled_ostream_end_use_class ((styled_ostream_t) stream, classname);
 183 }
 184
 185 #else
 186
 187 #define is_stylable(stream) false
 188 #define begin_css_class(stream,classname) /* empty */
 189 #define end_css_class(stream,classname) /* empty */
 190
 191 #endif
 192
 193 /* CSS classes at message level.  */
 194 static const char class_header[] = "header";
 195 static const char class_translated[] = "translated";
 196 static const char class_untranslated[] = "untranslated";
 197 static const char class_fuzzy[] = "fuzzy";
 198 static const char class_obsolete[] = "obsolete";
 199
 200 /* CSS classes describing the parts of a message.  */
 201 static const char class_comment[] = "comment";
 202 static const char class_translator_comment[] = "translator-comment";
 203 static const char class_extracted_comment[] = "extracted-comment";
 204 static const char class_reference_comment[] = "reference-comment";
 205 static const char class_reference[] = "reference";
 206 static const char class_flag_comment[] = "flag-comment";
 207 static const char class_flag[] = "flag";
 208 static const char class_fuzzy_flag[] = "fuzzy-flag";
 209 static const char class_previous_comment[] = "previous-comment";
 210 static const char class_previous[] = "previous";
 211 static const char class_msgid[] = "msgid";
 212 static const char class_msgstr[] = "msgstr";
 213 static const char class_keyword[] = "keyword";
 214 static const char class_string[] = "string";
 215
 216 /* CSS classes for the contents of strings.  */
 217 static const char class_text[] = "text";
 218 static const char class_escape_sequence[] = "escape-sequence";
 219 static const char class_format_directive[] = "format-directive";
 220 static const char class_invalid_format_directive[] = "invalid-format-directive";
 221 #if 0
 222 static const char class_added[] = "added";
 223 static const char class_changed[] = "changed";
 224 static const char class_removed[] = "removed";
 225 #endif
 226
 227 /* Per-character attributes.  */
 228 enum
 229 {
 230   ATTR_ESCAPE_SEQUENCE          = 1 << 0,
 231   /* The following two are exclusive.  */
 232   ATTR_FORMAT_DIRECTIVE         = 1 << 1,
 233   ATTR_INVALID_FORMAT_DIRECTIVE = 1 << 2
 234 };
 235
 236
 237 /* ================ Output parts of a message, as comments. ================ */
 238
 239
 240 /* Output mp->comment as a set of comment lines.  */
 241
 242 void
 243 message_print_comment (const message_ty *mp, ostream_t stream)
 244 {
 245   if (mp->comment != NULL)
 246     {
 247       size_t j;
 248
 249       begin_css_class (stream, class_translator_comment);
 250
 251       for (j = 0; j < mp->comment->nitems; ++j)
 252         {
 253           const char *s = mp->comment->item[j];
 254           do
 255             {
 256               const char *e;
 257               ostream_write_str (stream, "#");
 258               if (*s != '\0')
 259                 ostream_write_str (stream, " ");
 260               e = strchr (s, '\n');
 261               if (e == NULL)
 262                 {
 263                   ostream_write_str (stream, s);
 264                   s = NULL;
 265                 }
 266               else
 267                 {
 268                   ostream_write_mem (stream, s, e - s);
 269                   s = e + 1;
 270                 }
 271               ostream_write_str (stream, "\n");
 272             }
 273           while (s != NULL);
 274         }
 275
 276       end_css_class (stream, class_translator_comment);
 277     }
 278 }
 279
 280
 281 /* Output mp->comment_dot as a set of comment lines.  */
 282
 283 void
 284 message_print_comment_dot (const message_ty *mp, ostream_t stream)
 285 {
 286   if (mp->comment_dot != NULL)
 287     {
 288       size_t j;
 289
 290       begin_css_class (stream, class_extracted_comment);
 291
 292       for (j = 0; j < mp->comment_dot->nitems; ++j)
 293         {
 294           const char *s = mp->comment_dot->item[j];
 295           ostream_write_str (stream, "#.");
 296           if (*s != '\0')
 297             ostream_write_str (stream, " ");
 298           ostream_write_str (stream, s);
 299           ostream_write_str (stream, "\n");
 300         }
 301
 302       end_css_class (stream, class_extracted_comment);
 303     }
 304 }
 305
 306
 307 /* Output mp->filepos as a set of comment lines.  */
 308
 309 void
 310 message_print_comment_filepos (const message_ty *mp, ostream_t stream,
 311                                bool uniforum, size_t page_width)
 312 {
 313   if (mp->filepos_count != 0)
 314     {
 315       begin_css_class (stream, class_reference_comment);
 316
 317       if (uniforum)
 318         {
 319           size_t j;
 320
 321           for (j = 0; j < mp->filepos_count; ++j)
 322             {
 323               lex_pos_ty *pp = &mp->filepos[j];
 324               const char *cp = pp->file_name;
 325               char *str;
 326
 327               while (cp[0] == '.' && cp[1] == '/')
 328                 cp += 2;
 329               ostream_write_str (stream, "# ");
 330               begin_css_class (stream, class_reference);
 331               /* There are two Sun formats to choose from: SunOS and
 332                  Solaris.  Use the Solaris form here.  */
 333               str = xasprintf ("File: %s, line: %ld",
 334                                cp, (long) pp->line_number);
 335               ostream_write_str (stream, str);
 336               end_css_class (stream, class_reference);
 337               ostream_write_str (stream, "\n");
 338               free (str);
 339             }
 340         }
 341       else
 342         {
 343           size_t column;
 344           size_t j;
 345
 346           ostream_write_str (stream, "#:");
 347           column = 2;
 348           for (j = 0; j < mp->filepos_count; ++j)
 349             {
 350               lex_pos_ty *pp;
 351               char buffer[21];
 352               const char *cp;
 353               size_t len;
 354
 355               pp = &mp->filepos[j];
 356               cp = pp->file_name;
 357               while (cp[0] == '.' && cp[1] == '/')
 358                 cp += 2;
 359               /* Some xgettext input formats, like RST, lack line numbers.  */
 360               if (pp->line_number == (size_t)(-1))
 361                 buffer[0] = '\0';
 362               else
 363                 sprintf (buffer, ":%ld", (long) pp->line_number);
 364               len = strlen (cp) + strlen (buffer) + 1;
 365               if (column > 2 && column + len >= page_width)
 366                 {
 367                   ostream_write_str (stream, "\n#:");
 368                   column = 2;
 369                 }
 370               ostream_write_str (stream, " ");
 371               begin_css_class (stream, class_reference);
 372               ostream_write_str (stream, cp);
 373               ostream_write_str (stream, buffer);
 374               end_css_class (stream, class_reference);
 375               column += len;
 376             }
 377           ostream_write_str (stream, "\n");
 378         }
 379
 380       end_css_class (stream, class_reference_comment);
 381     }
 382 }
 383
 384
 385 /* Output mp->is_fuzzy, mp->is_format, mp->range, mp->do_wrap as a comment
 386    line.  */
 387
 388 void
 389 message_print_comment_flags (const message_ty *mp, ostream_t stream, bool debug)
 390 {
 391   if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
 392       || has_significant_format_p (mp->is_format)
 393       || has_range_p (mp->range)
 394       || mp->do_wrap == no)
 395     {
 396       bool first_flag = true;
 397       size_t i;
 398
 399       begin_css_class (stream, class_flag_comment);
 400
 401       ostream_write_str (stream, "#,");
 402
 403       /* We don't print the fuzzy flag if the msgstr is empty.  This
 404          might be introduced by the user but we want to normalize the
 405          output.  */
 406       if (mp->is_fuzzy && mp->msgstr[0] != '\0')
 407         {
 408           ostream_write_str (stream, " ");
 409           begin_css_class (stream, class_flag);
 410           begin_css_class (stream, class_fuzzy_flag);
 411           ostream_write_str (stream, "fuzzy");
 412           end_css_class (stream, class_fuzzy_flag);
 413           end_css_class (stream, class_flag);
 414           first_flag = false;
 415         }
 416
 417       for (i = 0; i < NFORMATS; i++)
 418         if (significant_format_p (mp->is_format[i]))
 419           {
 420             if (!first_flag)
 421               ostream_write_str (stream, ",");
 422
 423             ostream_write_str (stream, " ");
 424             begin_css_class (stream, class_flag);
 425             ostream_write_str (stream,
 426                                make_format_description_string (mp->is_format[i],
 427                                                                format_language[i],
 428                                                                debug));
 429             end_css_class (stream, class_flag);
 430             first_flag = false;
 431           }
 432
 433       if (has_range_p (mp->range))
 434         {
 435           char *string;
 436
 437           if (!first_flag)
 438             ostream_write_str (stream, ",");
 439
 440           ostream_write_str (stream, " ");
 441           begin_css_class (stream, class_flag);
 442           string = make_range_description_string (mp->range);
 443           ostream_write_str (stream, string);
 444           free (string);
 445           end_css_class (stream, class_flag);
 446           first_flag = false;
 447         }
 448
 449       if (mp->do_wrap == no)
 450         {
 451           if (!first_flag)
 452             ostream_write_str (stream, ",");
 453
 454           ostream_write_str (stream, " ");
 455           begin_css_class (stream, class_flag);
 456           ostream_write_str (stream,
 457                              make_c_width_description_string (mp->do_wrap));
 458           end_css_class (stream, class_flag);
 459           first_flag = false;
 460         }
 461
 462       ostream_write_str (stream, "\n");
 463
 464       end_css_class (stream, class_flag_comment);
 465     }
 466 }
 467
 468
 469 /* ========= Some parameters for use by 'msgdomain_list_print_po'. ========= */
 470
 471
 472 /* This variable controls the extent to which the page width applies.
 473    True means it applies to message strings and file reference lines.
 474    False means it applies to file reference lines only.  */
 475 static bool wrap_strings = true;
 476
 477 void
 478 message_page_width_ignore ()
 479 {
 480   wrap_strings = false;
 481 }
 482
 483
 484 /* These three variables control the output style of the message_print
 485    function.  Interface functions for them are to be used.  */
 486 static bool indent = false;
 487 static bool uniforum = false;
 488 static bool escape = false;
 489
 490 void
 491 message_print_style_indent ()
 492 {
 493   indent = true;
 494 }
 495
 496 void
 497 message_print_style_uniforum ()
 498 {
 499   uniforum = true;
 500 }
 501
 502 void
 503 message_print_style_escape (bool flag)
 504 {
 505   escape = flag;
 506 }
 507
 508
 509 /* =============== msgdomain_list_print_po() and subroutines. =============== */
 510
 511
 512 /* A version of memcpy optimized for the case n <= 1.  */
 513 static inline void
 514 memcpy_small (void *dst, const void *src, size_t n)
 515 {
 516   if (n > 0)
 517     {
 518       char *q = (char *) dst;
 519       const char *p = (const char *) src;
 520
 521       *q = *p;
 522       if (--n > 0)
 523         do *++q = *++p; while (--n > 0);
 524     }
 525 }
 526
 527
 528 /* A version of memset optimized for the case n <= 1.  */
 529 static inline void
 530 memset_small (void *dst, char c, size_t n)
 531 {
 532   if (n > 0)
 533     {
 534       char *p = (char *) dst;
 535
 536       *p = c;
 537       if (--n > 0)
 538         do *++p = c; while (--n > 0);
 539     }
 540 }
 541
 542
 543 static void
 544 wrap (const message_ty *mp, ostream_t stream,
 545       const char *line_prefix, int extra_indent, const char *css_class,
 546       const char *name, const char *value,
 547       enum is_wrap do_wrap, size_t page_width,
 548       const char *charset)
 549 {
 550   const char *canon_charset;
 551   char *fmtdir;
 552   char *fmtdirattr;
 553   const char *s;
 554   bool first_line;
 555 #if HAVE_ICONV
 556   const char *envval;
 557   iconv_t conv;
 558 #endif
 559   bool weird_cjk;
 560
 561   canon_charset = po_charset_canonicalize (charset);
 562
 563 #if HAVE_ICONV
 564   /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
 565      about multibyte encodings, and require a spurious backslash after
 566      every multibyte character whose last byte is 0x5C.  Some programs,
 567      like vim, distribute PO files in this broken format.  It is important
 568      for such programs that GNU msgmerge continues to support this old
 569      PO file format when the Makefile requests it.  */
 570   envval = getenv ("OLD_PO_FILE_OUTPUT");
 571   if (envval != NULL && *envval != '\0')
 572     /* Write a PO file in old format, with extraneous backslashes.  */
 573     conv = (iconv_t)(-1);
 574   else
 575     if (canon_charset == NULL)
 576       /* Invalid PO file encoding.  */
 577       conv = (iconv_t)(-1);
 578     else
 579       /* Avoid glibc-2.1 bug with EUC-KR.  */
 580 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
 581      && !defined _LIBICONV_VERSION
 582       if (strcmp (canon_charset, "EUC-KR") == 0)
 583         conv = (iconv_t)(-1);
 584       else
 585 # endif
 586       /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
 587          GB18030.  */
 588 # if defined __sun && !defined _LIBICONV_VERSION
 589       if (   strcmp (canon_charset, "GB2312") == 0
 590           || strcmp (canon_charset, "EUC-TW") == 0
 591           || strcmp (canon_charset, "BIG5") == 0
 592           || strcmp (canon_charset, "BIG5-HKSCS") == 0
 593           || strcmp (canon_charset, "GBK") == 0
 594           || strcmp (canon_charset, "GB18030") == 0)
 595         conv = (iconv_t)(-1);
 596       else
 597 # endif
 598       /* Use iconv() to parse multibyte characters.  */
 599       conv = iconv_open ("UTF-8", canon_charset);
 600
 601   if (conv != (iconv_t)(-1))
 602     weird_cjk = false;
 603   else
 604 #endif
 605     if (canon_charset == NULL)
 606       weird_cjk = false;
 607     else
 608       weird_cjk = po_is_charset_weird_cjk (canon_charset);
 609
 610   if (canon_charset == NULL)
 611     canon_charset = po_charset_ascii;
 612
 613   /* Determine the extent of format string directives.  */
 614   fmtdir = NULL;
 615   fmtdirattr = NULL;
 616   if (value[0] != '\0')
 617     {
 618       bool is_msgstr =
 619         (strlen (name) >= 6 && memcmp (name, "msgstr", 6) == 0);
 620         /* or equivalent: = (css_class == class_msgstr) */
 621       size_t i;
 622
 623       for (i = 0; i < NFORMATS; i++)
 624         if (possible_format_p (mp->is_format[i]))
 625           {
 626             size_t len = strlen (value);
 627             struct formatstring_parser *parser = formatstring_parsers[i];
 628             char *invalid_reason = NULL;
 629             void *descr;
 630             const char *fdp;
 631             const char *fd_end;
 632             char *fdap;
 633
 634             fmtdir = XCALLOC (len, char);
 635             descr = parser->parse (value, is_msgstr, fmtdir, &invalid_reason);
 636             if (descr != NULL)
 637               parser->free (descr);
 638
 639             /* Locate the FMTDIR_* bits and transform the array to an array
 640                of attributes.  */
 641             fmtdirattr = XCALLOC (len, char);
 642             fd_end = fmtdir + len;
 643             for (fdp = fmtdir, fdap = fmtdirattr; fdp < fd_end; fdp++, fdap++)
 644               if (*fdp & FMTDIR_START)
 645                 {
 646                   const char *fdq;
 647                   for (fdq = fdp; fdq < fd_end; fdq++)
 648                     if (*fdq & (FMTDIR_END | FMTDIR_ERROR))
 649                       break;
 650                   if (!(fdq < fd_end))
 651                     /* The ->parse method has determined the start of a
 652                        formatstring directive but not stored a bit indicating
 653                        its end. It is a bug in the ->parse method.  */
 654                     abort ();
 655                   if (*fdq & FMTDIR_ERROR)
 656                     memset (fdap, ATTR_INVALID_FORMAT_DIRECTIVE, fdq - fdp + 1);
 657                   else
 658                     memset (fdap, ATTR_FORMAT_DIRECTIVE, fdq - fdp + 1);
 659                   fdap += fdq - fdp;
 660                   fdp = fdq;
 661                 }
 662               else
 663                 *fdap = 0;
 664
 665             break;
 666           }
 667     }
 668
 669   /* Loop over the '\n' delimited portions of value.  */
 670   s = value;
 671   first_line = true;
 672   do
 673     {
 674       /* The usual escapes, as defined by the ANSI C Standard.  */
 675 #     define is_escape(c) \
 676         ((c) == '\a' || (c) == '\b' || (c) == '\f' || (c) == '\n' \
 677          || (c) == '\r' || (c) == '\t' || (c) == '\v')
 678
 679       const char *es;
 680       const char *ep;
 681       size_t portion_len;
 682       char *portion;
 683       char *overrides;
 684       char *attributes;
 685       char *linebreaks;
 686       char *pp;
 687       char *op;
 688       char *ap;
 689       int startcol, startcol_after_break, width;
 690       size_t i;
 691
 692       for (es = s; *es != '\0'; )
 693         if (*es++ == '\n')
 694           break;
 695
 696       /* Expand escape sequences in each portion.  */
 697       for (ep = s, portion_len = 0; ep < es; ep++)
 698         {
 699           char c = *ep;
 700           if (is_escape (c))
 701             portion_len += 2;
 702           else if (escape && !c_isprint ((unsigned char) c))
 703             portion_len += 4;
 704           else if (c == '\\' || c == '"')
 705             portion_len += 2;
 706           else
 707             {
 708 #if HAVE_ICONV
 709               if (conv != (iconv_t)(-1))
 710                 {
 711                   /* Skip over a complete multi-byte character.  Don't
 712                      interpret the second byte of a multi-byte character as
 713                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
 714                      GB18030, SHIFT_JIS, JOHAB encodings.  */
 715                   char scratchbuf[64];
 716                   const char *inptr = ep;
 717                   size_t insize;
 718                   char *outptr = &scratchbuf[0];
 719                   size_t outsize = sizeof (scratchbuf);
 720                   size_t res;
 721
 722                   res = (size_t)(-1);
 723                   for (insize = 1; inptr + insize <= es; insize++)
 724                     {
 725                       res = iconv (conv,
 726                                    (ICONV_CONST char **) &inptr, &insize,
 727                                    &outptr, &outsize);
 728                       if (!(res == (size_t)(-1) && errno == EINVAL))
 729                         break;
 730                       /* We expect that no input bytes have been consumed
 731                          so far.  */
 732                       if (inptr != ep)
 733                         abort ();
 734                     }
 735                   if (res == (size_t)(-1))
 736                     {
 737                       if (errno == EILSEQ)
 738                         {
 739                           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
 740                                      _("invalid multibyte sequence"));
 741                           continue;
 742                         }
 743                       else
 744                         abort ();
 745                     }
 746                   insize = inptr - ep;
 747                   portion_len += insize;
 748                   ep += insize - 1;
 749                 }
 750               else
 751 #endif
 752                 {
 753                   if (weird_cjk
 754                       /* Special handling of encodings with CJK structure.  */
 755                       && ep + 2 <= es
 756                       && (unsigned char) ep[0] >= 0x80
 757                       && (unsigned char) ep[1] >= 0x30)
 758                     {
 759                       portion_len += 2;
 760                       ep += 1;
 761                     }
 762                   else
 763                     portion_len += 1;
 764                 }
 765             }
 766         }
 767       portion = XNMALLOC (portion_len, char);
 768       overrides = XNMALLOC (portion_len, char);
 769       attributes = XNMALLOC (portion_len, char);
 770       for (ep = s, pp = portion, op = overrides, ap = attributes; ep < es; ep++)
 771         {
 772           char c = *ep;
 773           char attr = (fmtdirattr != NULL ? fmtdirattr[ep - value] : 0);
 774           char brk = UC_BREAK_UNDEFINED;
 775           /* Don't break inside format directives.  */
 776           if (attr == ATTR_FORMAT_DIRECTIVE
 777               && (fmtdir[ep - value] & FMTDIR_START) == 0)
 778             brk = UC_BREAK_PROHIBITED;
 779           if (is_escape (c))
 780             {
 781               switch (c)
 782                 {
 783                 case '\a': c = 'a'; break;
 784                 case '\b': c = 'b'; break;
 785                 case '\f': c = 'f'; break;
 786                 case '\n': c = 'n'; break;
 787                 case '\r': c = 'r'; break;
 788                 case '\t': c = 't'; break;
 789                 case '\v': c = 'v'; break;
 790                 default: abort ();
 791                 }
 792               *pp++ = '\\';
 793               *pp++ = c;
 794               *op++ = brk;
 795               *op++ = UC_BREAK_PROHIBITED;
 796               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 797               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 798               /* We warn about any use of escape sequences beside
 799                  '\n' and '\t'.  */
 800               if (c != 'n' && c != 't')
 801                 {
 802                   char *error_message =
 803                     xasprintf (_("\
 804 internationalized messages should not contain the '\\%c' escape sequence"),
 805                                c);
 806                   po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, false,
 807                              error_message);
 808                   free (error_message);
 809                 }
 810             }
 811           else if (escape && !c_isprint ((unsigned char) c))
 812             {
 813               *pp++ = '\\';
 814               *pp++ = '0' + (((unsigned char) c >> 6) & 7);
 815               *pp++ = '0' + (((unsigned char) c >> 3) & 7);
 816               *pp++ = '0' + ((unsigned char) c & 7);
 817               *op++ = brk;
 818               *op++ = UC_BREAK_PROHIBITED;
 819               *op++ = UC_BREAK_PROHIBITED;
 820               *op++ = UC_BREAK_PROHIBITED;
 821               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 822               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 823               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 824               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 825             }
 826           else if (c == '\\' || c == '"')
 827             {
 828               *pp++ = '\\';
 829               *pp++ = c;
 830               *op++ = brk;
 831               *op++ = UC_BREAK_PROHIBITED;
 832               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 833               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
 834             }
 835           else
 836             {
 837 #if HAVE_ICONV
 838               if (conv != (iconv_t)(-1))
 839                 {
 840                   /* Copy a complete multi-byte character.  Don't
 841                      interpret the second byte of a multi-byte character as
 842                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
 843                      GB18030, SHIFT_JIS, JOHAB encodings.  */
 844                   char scratchbuf[64];
 845                   const char *inptr = ep;
 846                   size_t insize;
 847                   char *outptr = &scratchbuf[0];
 848                   size_t outsize = sizeof (scratchbuf);
 849                   size_t res;
 850
 851                   res = (size_t)(-1);
 852                   for (insize = 1; inptr + insize <= es; insize++)
 853                     {
 854                       res = iconv (conv,
 855                                    (ICONV_CONST char **) &inptr, &insize,
 856                                    &outptr, &outsize);
 857                       if (!(res == (size_t)(-1) && errno == EINVAL))
 858                         break;
 859                       /* We expect that no input bytes have been consumed
 860                          so far.  */
 861                       if (inptr != ep)
 862                         abort ();
 863                     }
 864                   if (res == (size_t)(-1))
 865                     {
 866                       if (errno == EILSEQ)
 867                         {
 868                           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0,
 869                                      false, _("invalid multibyte sequence"));
 870                           continue;
 871                         }
 872                       else
 873                         abort ();
 874                     }
 875                   insize = inptr - ep;
 876                   memcpy_small (pp, ep, insize);
 877                   pp += insize;
 878                   *op = brk;
 879                   memset_small (op + 1, UC_BREAK_PROHIBITED, insize - 1);
 880                   op += insize;
 881                   memset_small (ap, attr, insize);
 882                   ap += insize;
 883                   ep += insize - 1;
 884                 }
 885               else
 886 #endif
 887                 {
 888                   if (weird_cjk
 889                       /* Special handling of encodings with CJK structure.  */
 890                       && ep + 2 <= es
 891                       && (unsigned char) c >= 0x80
 892                       && (unsigned char) ep[1] >= 0x30)
 893                     {
 894                       *pp++ = c;
 895                       ep += 1;
 896                       *pp++ = *ep;
 897                       *op++ = brk;
 898                       *op++ = UC_BREAK_PROHIBITED;
 899                       *ap++ = attr;
 900                       *ap++ = attr;
 901                     }
 902                   else
 903                     {
 904                       *pp++ = c;
 905                       *op++ = brk;
 906                       *ap++ = attr;
 907                     }
 908                 }
 909             }
 910         }
 911
 912       /* Don't break immediately before the "\n" at the end.  */
 913       if (es > s && es[-1] == '\n')
 914         overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
 915
 916       linebreaks = XNMALLOC (portion_len, char);
 917
 918       /* Subsequent lines after a break are all indented.
 919          See INDENT-S.  */
 920       startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
 921       if (indent)
 922         startcol_after_break = (startcol_after_break + extra_indent + 8) & ~7;
 923       startcol_after_break++;
 924
 925       /* The line width.  Allow room for the closing quote character.  */
 926       width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
 927       /* Adjust for indentation of subsequent lines.  */
 928       width -= startcol_after_break;
 929
 930     recompute:
 931       /* The line starts with different things depending on whether it
 932          is the first line, and if we are using the indented style.
 933          See INDENT-F.  */
 934       startcol = (line_prefix ? strlen (line_prefix) : 0);
 935       if (first_line)
 936         {
 937           startcol += strlen (name);
 938           if (indent)
 939             startcol = (startcol + extra_indent + 8) & ~7;
 940           else
 941             startcol++;
 942         }
 943       else
 944         {
 945           if (indent)
 946             startcol = (startcol + extra_indent + 8) & ~7;
 947         }
 948       /* Allow room for the opening quote character.  */
 949       startcol++;
 950       /* Adjust for indentation of subsequent lines.  */
 951       startcol -= startcol_after_break;
 952
 953       /* Do line breaking on the portion.  */
 954       ulc_width_linebreaks (portion, portion_len, width, startcol, 0,
 955                             overrides, canon_charset, linebreaks);
 956
 957       /* If this is the first line, and we are not using the indented
 958          style, and the line would wrap, then use an empty first line
 959          and restart.  */
 960       if (first_line && !indent
 961           && portion_len > 0
 962           && (*es != '\0'
 963               || startcol > width
 964               || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
 965         {
 966           if (line_prefix != NULL)
 967             ostream_write_str (stream, line_prefix);
 968           begin_css_class (stream, css_class);
 969           begin_css_class (stream, class_keyword);
 970           ostream_write_str (stream, name);
 971           end_css_class (stream, class_keyword);
 972           ostream_write_str (stream, " ");
 973           begin_css_class (stream, class_string);
 974           ostream_write_str (stream, "\"\"");
 975           end_css_class (stream, class_string);
 976           end_css_class (stream, css_class);
 977           ostream_write_str (stream, "\n");
 978           first_line = false;
 979           /* Recompute startcol and linebreaks.  */
 980           goto recompute;
 981         }
 982
 983       /* Print the beginning of the line.  This will depend on whether
 984          this is the first line, and if the indented style is being
 985          used.  INDENT-F.  */
 986       {
 987         int currcol = 0;
 988
 989         if (line_prefix != NULL)
 990           {
 991             ostream_write_str (stream, line_prefix);
 992             currcol = strlen (line_prefix);
 993           }
 994         begin_css_class (stream, css_class);
 995         if (first_line)
 996           {
 997             begin_css_class (stream, class_keyword);
 998             ostream_write_str (stream, name);
 999             currcol += strlen (name);
1000             end_css_class (stream, class_keyword);
1001             if (indent)
1002               {
1003                 if (extra_indent > 0)
1004                   ostream_write_mem (stream, "        ", extra_indent);
1005                 currcol += extra_indent;
1006                 ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1007                 currcol = (currcol + 8) & ~7;
1008               }
1009             else
1010               {
1011                 ostream_write_str (stream, " ");
1012                 currcol++;
1013               }
1014             first_line = false;
1015           }
1016         else
1017           {
1018             if (indent)
1019               {
1020                 if (extra_indent > 0)
1021                   ostream_write_mem (stream, "        ", extra_indent);
1022                 currcol += extra_indent;
1023                 ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1024                 currcol = (currcol + 8) & ~7;
1025               }
1026           }
1027       }
1028
1029       /* Print the portion itself, with linebreaks where necessary.  */
1030       {
1031         char currattr = 0;
1032
1033         begin_css_class (stream, class_string);
1034         ostream_write_str (stream, "\"");
1035         begin_css_class (stream, class_text);
1036
1037         for (i = 0; i < portion_len; i++)
1038           {
1039             if (linebreaks[i] == UC_BREAK_POSSIBLE)
1040               {
1041                 int currcol;
1042
1043                 /* Change currattr so that it becomes 0.  */
1044                 if (currattr & ATTR_ESCAPE_SEQUENCE)
1045                   {
1046                     end_css_class (stream, class_escape_sequence);
1047                     currattr &= ~ATTR_ESCAPE_SEQUENCE;
1048                   }
1049                 if (currattr & ATTR_FORMAT_DIRECTIVE)
1050                   {
1051                     end_css_class (stream, class_format_directive);
1052                     currattr &= ~ATTR_FORMAT_DIRECTIVE;
1053                   }
1054                 else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1055                   {
1056                     end_css_class (stream, class_invalid_format_directive);
1057                     currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1058                   }
1059                 if (!(currattr == 0))
1060                   abort ();
1061
1062                 end_css_class (stream, class_text);
1063                 ostream_write_str (stream, "\"");
1064                 end_css_class (stream, class_string);
1065                 end_css_class (stream, css_class);
1066                 ostream_write_str (stream, "\n");
1067                 currcol = 0;
1068                 /* INDENT-S.  */
1069                 if (line_prefix != NULL)
1070                   {
1071                     ostream_write_str (stream, line_prefix);
1072                     currcol = strlen (line_prefix);
1073                   }
1074                 begin_css_class (stream, css_class);
1075                 if (indent)
1076                   {
1077                     ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1078                     currcol = (currcol + 8) & ~7;
1079                   }
1080                 begin_css_class (stream, class_string);
1081                 ostream_write_str (stream, "\"");
1082                 begin_css_class (stream, class_text);
1083               }
1084             /* Change currattr so that it matches attributes[i].  */
1085             if (attributes[i] != currattr)
1086               {
1087                 /* class_escape_sequence occurs inside class_format_directive
1088                    and class_invalid_format_directive, so clear it first.  */
1089                 if (currattr & ATTR_ESCAPE_SEQUENCE)
1090                   {
1091                     end_css_class (stream, class_escape_sequence);
1092                     currattr &= ~ATTR_ESCAPE_SEQUENCE;
1093                   }
1094                 if (~attributes[i] & currattr & ATTR_FORMAT_DIRECTIVE)
1095                   {
1096                     end_css_class (stream, class_format_directive);
1097                     currattr &= ~ATTR_FORMAT_DIRECTIVE;
1098                   }
1099                 else if (~attributes[i] & currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1100                   {
1101                     end_css_class (stream, class_invalid_format_directive);
1102                     currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1103                   }
1104                 if (attributes[i] & ~currattr & ATTR_FORMAT_DIRECTIVE)
1105                   {
1106                     begin_css_class (stream, class_format_directive);
1107                     currattr |= ATTR_FORMAT_DIRECTIVE;
1108                   }
1109                 else if (attributes[i] & ~currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1110                   {
1111                     begin_css_class (stream, class_invalid_format_directive);
1112                     currattr |= ATTR_INVALID_FORMAT_DIRECTIVE;
1113                   }
1114                 /* class_escape_sequence occurs inside class_format_directive
1115                    and class_invalid_format_directive, so set it last.  */
1116                 if (attributes[i] & ~currattr & ATTR_ESCAPE_SEQUENCE)
1117                   {
1118                     begin_css_class (stream, class_escape_sequence);
1119                     currattr |= ATTR_ESCAPE_SEQUENCE;
1120                   }
1121               }
1122             ostream_write_mem (stream, &portion[i], 1);
1123           }
1124
1125         /* Change currattr so that it becomes 0.  */
1126         if (currattr & ATTR_ESCAPE_SEQUENCE)
1127           {
1128             end_css_class (stream, class_escape_sequence);
1129             currattr &= ~ATTR_ESCAPE_SEQUENCE;
1130           }
1131         if (currattr & ATTR_FORMAT_DIRECTIVE)
1132           {
1133             end_css_class (stream, class_format_directive);
1134             currattr &= ~ATTR_FORMAT_DIRECTIVE;
1135           }
1136         else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1137           {
1138             end_css_class (stream, class_invalid_format_directive);
1139             currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1140           }
1141         if (!(currattr == 0))
1142           abort ();
1143
1144         end_css_class (stream, class_text);
1145         ostream_write_str (stream, "\"");
1146         end_css_class (stream, class_string);
1147         end_css_class (stream, css_class);
1148         ostream_write_str (stream, "\n");
1149       }
1150
1151       free (linebreaks);
1152       free (attributes);
1153       free (overrides);
1154       free (portion);
1155
1156       s = es;
1157 #     undef is_escape
1158     }
1159   while (*s);
1160
1161   if (fmtdirattr != NULL)
1162     free (fmtdirattr);
1163   if (fmtdir != NULL)
1164     free (fmtdir);
1165
1166 #if HAVE_ICONV
1167   if (conv != (iconv_t)(-1))
1168     iconv_close (conv);
1169 #endif
1170 }
1171
1172
1173 static void
1174 print_blank_line (ostream_t stream)
1175 {
1176   if (uniforum)
1177     {
1178       begin_css_class (stream, class_comment);
1179       ostream_write_str (stream, "#\n");
1180       end_css_class (stream, class_comment);
1181     }
1182   else
1183     ostream_write_str (stream, "\n");
1184 }
1185
1186
1187 static void
1188 message_print (const message_ty *mp, ostream_t stream,
1189                const char *charset, size_t page_width, bool blank_line,
1190                bool debug)
1191 {
1192   int extra_indent;
1193
1194   /* Separate messages with a blank line.  Uniforum doesn't like blank
1195      lines, so use an empty comment (unless there already is one).  */
1196   if (blank_line && (!uniforum
1197                      || mp->comment == NULL
1198                      || mp->comment->nitems == 0
1199                      || mp->comment->item[0][0] != '\0'))
1200     print_blank_line (stream);
1201
1202   if (is_header (mp))
1203     begin_css_class (stream, class_header);
1204   else if (mp->msgstr[0] == '\0')
1205     begin_css_class (stream, class_untranslated);
1206   else if (mp->is_fuzzy)
1207     begin_css_class (stream, class_fuzzy);
1208   else
1209     begin_css_class (stream, class_translated);
1210
1211   begin_css_class (stream, class_comment);
1212
1213   /* Print translator comment if available.  */
1214   message_print_comment (mp, stream);
1215
1216   /* Print xgettext extracted comments.  */
1217   message_print_comment_dot (mp, stream);
1218
1219   /* Print the file position comments.  This will help a human who is
1220      trying to navigate the sources.  There is no problem of getting
1221      repeated positions, because duplicates are checked for.  */
1222   message_print_comment_filepos (mp, stream, uniforum, page_width);
1223
1224   /* Print flag information in special comment.  */
1225   message_print_comment_flags (mp, stream, debug);
1226
1227   /* Print the previous msgid.  This helps the translator when the msgid has
1228      only slightly changed.  */
1229   begin_css_class (stream, class_previous_comment);
1230   if (mp->prev_msgctxt != NULL)
1231     wrap (mp, stream, "#| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1232           mp->do_wrap, page_width, charset);
1233   if (mp->prev_msgid != NULL)
1234     wrap (mp, stream, "#| ", 0, class_previous, "msgid", mp->prev_msgid,
1235           mp->do_wrap, page_width, charset);
1236   if (mp->prev_msgid_plural != NULL)
1237     wrap (mp, stream, "#| ", 0, class_previous, "msgid_plural",
1238           mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1239   end_css_class (stream, class_previous_comment);
1240   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1241                   || mp->prev_msgid_plural != NULL
1242                   ? 3
1243                   : 0);
1244
1245   end_css_class (stream, class_comment);
1246
1247   /* Print each of the message components.  Wrap them nicely so they
1248      are as readable as possible.  If there is no recorded msgstr for
1249      this domain, emit an empty string.  */
1250   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1251       && po_charset_canonicalize (charset) != po_charset_utf8)
1252     {
1253       char *warning_message =
1254         xasprintf (_("\
1255 The following msgctxt contains non-ASCII characters.\n\
1256 This will cause problems to translators who use a character encoding\n\
1257 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1258 %s\n"), mp->msgctxt);
1259       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1260       free (warning_message);
1261     }
1262   if (!is_ascii_string (mp->msgid)
1263       && po_charset_canonicalize (charset) != po_charset_utf8)
1264     {
1265       char *warning_message =
1266         xasprintf (_("\
1267 The following msgid contains non-ASCII characters.\n\
1268 This will cause problems to translators who use a character encoding\n\
1269 different from yours. Consider using a pure ASCII msgid instead.\n\
1270 %s\n"), mp->msgid);
1271       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1272       free (warning_message);
1273     }
1274   if (mp->msgctxt != NULL)
1275     wrap (mp, stream, NULL, extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1276           mp->do_wrap, page_width, charset);
1277   wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid", mp->msgid,
1278         mp->do_wrap, page_width, charset);
1279   if (mp->msgid_plural != NULL)
1280     wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid_plural",
1281           mp->msgid_plural, mp->do_wrap, page_width, charset);
1282
1283   if (mp->msgid_plural == NULL)
1284     wrap (mp, stream, NULL, extra_indent, class_msgstr, "msgstr", mp->msgstr,
1285           mp->do_wrap, page_width, charset);
1286   else
1287     {
1288       char prefix_buf[20];
1289       unsigned int i;
1290       const char *p;
1291
1292       for (p = mp->msgstr, i = 0;
1293            p < mp->msgstr + mp->msgstr_len;
1294            p += strlen (p) + 1, i++)
1295         {
1296           sprintf (prefix_buf, "msgstr[%u]", i);
1297           wrap (mp, stream, NULL, extra_indent, class_msgstr, prefix_buf, p,
1298                 mp->do_wrap, page_width, charset);
1299         }
1300     }
1301
1302   if (is_header (mp))
1303     end_css_class (stream, class_header);
1304   else if (mp->msgstr[0] == '\0')
1305     end_css_class (stream, class_untranslated);
1306   else if (mp->is_fuzzy)
1307     end_css_class (stream, class_fuzzy);
1308   else
1309     end_css_class (stream, class_translated);
1310 }
1311
1312
1313 static void
1314 message_print_obsolete (const message_ty *mp, ostream_t stream,
1315                         const char *charset, size_t page_width, bool blank_line)
1316 {
1317   int extra_indent;
1318
1319   /* If msgstr is the empty string we print nothing.  */
1320   if (mp->msgstr[0] == '\0')
1321     return;
1322
1323   /* Separate messages with a blank line.  Uniforum doesn't like blank
1324      lines, so use an empty comment (unless there already is one).  */
1325   if (blank_line)
1326     print_blank_line (stream);
1327
1328   begin_css_class (stream, class_obsolete);
1329
1330   begin_css_class (stream, class_comment);
1331
1332   /* Print translator comment if available.  */
1333   message_print_comment (mp, stream);
1334
1335   /* Print xgettext extracted comments (normally empty).  */
1336   message_print_comment_dot (mp, stream);
1337
1338   /* Print the file position comments (normally empty).  */
1339   message_print_comment_filepos (mp, stream, uniforum, page_width);
1340
1341   /* Print flag information in special comment.  */
1342   if (mp->is_fuzzy)
1343     {
1344       bool first = true;
1345
1346       ostream_write_str (stream, "#,");
1347
1348       if (mp->is_fuzzy)
1349         {
1350           ostream_write_str (stream, " fuzzy");
1351           first = false;
1352         }
1353
1354       ostream_write_str (stream, "\n");
1355     }
1356
1357   /* Print the previous msgid.  This helps the translator when the msgid has
1358      only slightly changed.  */
1359   begin_css_class (stream, class_previous_comment);
1360   if (mp->prev_msgctxt != NULL)
1361     wrap (mp, stream, "#~| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1362           mp->do_wrap, page_width, charset);
1363   if (mp->prev_msgid != NULL)
1364     wrap (mp, stream, "#~| ", 0, class_previous, "msgid", mp->prev_msgid,
1365           mp->do_wrap, page_width, charset);
1366   if (mp->prev_msgid_plural != NULL)
1367     wrap (mp, stream, "#~| ", 0, class_previous, "msgid_plural",
1368           mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1369   end_css_class (stream, class_previous_comment);
1370   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1371                   || mp->prev_msgid_plural != NULL
1372                   ? 1
1373                   : 0);
1374
1375   end_css_class (stream, class_comment);
1376
1377   /* Print each of the message components.  Wrap them nicely so they
1378      are as readable as possible.  */
1379   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1380       && po_charset_canonicalize (charset) != po_charset_utf8)
1381     {
1382       char *warning_message =
1383         xasprintf (_("\
1384 The following msgctxt contains non-ASCII characters.\n\
1385 This will cause problems to translators who use a character encoding\n\
1386 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1387 %s\n"), mp->msgctxt);
1388       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1389       free (warning_message);
1390     }
1391   if (!is_ascii_string (mp->msgid)
1392       && po_charset_canonicalize (charset) != po_charset_utf8)
1393     {
1394       char *warning_message =
1395         xasprintf (_("\
1396 The following msgid contains non-ASCII characters.\n\
1397 This will cause problems to translators who use a character encoding\n\
1398 different from yours. Consider using a pure ASCII msgid instead.\n\
1399 %s\n"), mp->msgid);
1400       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1401       free (warning_message);
1402     }
1403   if (mp->msgctxt != NULL)
1404     wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1405           mp->do_wrap, page_width, charset);
1406   wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid", mp->msgid,
1407         mp->do_wrap, page_width, charset);
1408   if (mp->msgid_plural != NULL)
1409     wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid_plural",
1410           mp->msgid_plural, mp->do_wrap, page_width, charset);
1411
1412   if (mp->msgid_plural == NULL)
1413     wrap (mp, stream, "#~ ", extra_indent, class_msgstr, "msgstr", mp->msgstr,
1414           mp->do_wrap, page_width, charset);
1415   else
1416     {
1417       char prefix_buf[20];
1418       unsigned int i;
1419       const char *p;
1420
1421       for (p = mp->msgstr, i = 0;
1422            p < mp->msgstr + mp->msgstr_len;
1423            p += strlen (p) + 1, i++)
1424         {
1425           sprintf (prefix_buf, "msgstr[%u]", i);
1426           wrap (mp, stream, "#~ ", extra_indent, class_msgstr, prefix_buf, p,
1427                 mp->do_wrap, page_width, charset);
1428         }
1429     }
1430
1431   end_css_class (stream, class_obsolete);
1432 }
1433
1434
1435 static void
1436 msgdomain_list_print_po (msgdomain_list_ty *mdlp, ostream_t stream,
1437                          size_t page_width, bool debug)
1438 {
1439   size_t j, k;
1440   bool blank_line;
1441
1442   /* Write out the messages for each domain.  */
1443   blank_line = false;
1444   for (k = 0; k < mdlp->nitems; k++)
1445     {
1446       message_list_ty *mlp;
1447       const char *header;
1448       const char *charset;
1449       char *allocated_charset;
1450
1451       /* If the first domain is the default, don't bother emitting
1452          the domain name, because it is the default.  */
1453       if (!(k == 0
1454             && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
1455         {
1456           if (blank_line)
1457             print_blank_line (stream);
1458           begin_css_class (stream, class_keyword);
1459           ostream_write_str (stream, "domain");
1460           end_css_class (stream, class_keyword);
1461           ostream_write_str (stream, " ");
1462           begin_css_class (stream, class_string);
1463           ostream_write_str (stream, "\"");
1464           begin_css_class (stream, class_text);
1465           ostream_write_str (stream, mdlp->item[k]->domain);
1466           end_css_class (stream, class_text);
1467           ostream_write_str (stream, "\"");
1468           end_css_class (stream, class_string);
1469           ostream_write_str (stream, "\n");
1470           blank_line = true;
1471         }
1472
1473       mlp = mdlp->item[k]->messages;
1474
1475       /* Search the header entry.  */
1476       header = NULL;
1477       for (j = 0; j < mlp->nitems; ++j)
1478         if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1479           {
1480             header = mlp->item[j]->msgstr;
1481             break;
1482           }
1483
1484       /* Extract the charset name.  */
1485       charset = "ASCII";
1486       allocated_charset = NULL;
1487       if (header != NULL)
1488         {
1489           const char *charsetstr = c_strstr (header, "charset=");
1490
1491           if (charsetstr != NULL)
1492             {
1493               size_t len;
1494
1495               charsetstr += strlen ("charset=");
1496               len = strcspn (charsetstr, " \t\n");
1497               allocated_charset = (char *) xmalloca (len + 1);
1498               memcpy (allocated_charset, charsetstr, len);
1499               allocated_charset[len] = '\0';
1500               charset = allocated_charset;
1501
1502               /* Treat the dummy default value as if it were absent.  */
1503               if (strcmp (charset, "CHARSET") == 0)
1504                 charset = "ASCII";
1505             }
1506         }
1507
1508       /* Write out each of the messages for this domain.  */
1509       for (j = 0; j < mlp->nitems; ++j)
1510         if (!mlp->item[j]->obsolete)
1511           {
1512             message_print (mlp->item[j], stream, charset, page_width,
1513                            blank_line, debug);
1514             blank_line = true;
1515           }
1516
1517       /* Write out each of the obsolete messages for this domain.  */
1518       for (j = 0; j < mlp->nitems; ++j)
1519         if (mlp->item[j]->obsolete)
1520           {
1521             message_print_obsolete (mlp->item[j], stream, charset, page_width,
1522                                     blank_line);
1523             blank_line = true;
1524           }
1525
1526       if (allocated_charset != NULL)
1527         freea (allocated_charset);
1528     }
1529 }
1530
1531
1532 /* Describes a PO file in .po syntax.  */
1533 const struct catalog_output_format output_format_po =
1534 {
1535   msgdomain_list_print_po,              /* print */
1536   false,                                /* requires_utf8 */
1537   true,                                 /* supports_color */
1538   true,                                 /* supports_multiple_domains */
1539   true,                                 /* supports_contexts */
1540   true,                                 /* supports_plurals */
1541   true,                                 /* sorts_obsoletes_to_end */
1542   false,                                /* alternative_is_po */
1543   false                                 /* alternative_is_java_class */
1544 };