Imported Upstream version 0.19.7

[platform/upstream/gettext.git] / gettext-tools / src / x-c.c
diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c

index 8af7429..68e9f5d 100644 (file)
--- a/gettext-tools/src/x-c.c
+++ b/gettext-tools/src/x-c.c
@@ -1,5 +1,6 @@
  /* xgettext C/C++/ObjectiveC backend.
-   Copyright (C) 1995-1998, 2000-2009, 2012 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998, 2000-2009, 2012, 2015 Free Software
+   Foundation, Inc.
  
     This file was written by Peter Miller <millerp@canb.auug.org.au>
  
@@ -23,6 +24,7 @@
  /* Specification.  */
  #include "x-c.h"
  
+#include <assert.h>
  #include <errno.h>
  #include <stdbool.h>
  #include <stdio.h>
@@ -36,6 +38,7 @@
  #include "xalloc.h"
  #include "xvasprintf.h"
  #include "hash.h"
+#include "po-charset.h"
  #include "gettext.h"
  
  #define _(s) gettext(s)
@@ -141,6 +144,14 @@ x_objc_keyword (const char *name)
    add_keyword (name, &objc_keywords);
  }
  
+static bool additional_keywords_kde;
+
+void
+activate_additional_keywords_kde ()
+{
+  additional_keywords_kde = true;
+}
+
  /* Finish initializing the keywords hash tables.
     Called after argument processing, before each file is processed.  */
  static void
@@ -164,6 +175,50 @@ init_keywords ()
        x_c_keyword ("dnpgettext:2c,3,4");
        x_c_keyword ("dcnpgettext:2c,3,4");
  
+      if (additional_keywords_kde)
+        {
+          x_c_keyword ("i18n:1");
+          x_c_keyword ("i18nc:1c,2");
+          x_c_keyword ("i18np:1,2");
+          x_c_keyword ("i18ncp:1c,2,3");
+          x_c_keyword ("i18nd:2");
+          x_c_keyword ("i18ndc:2c,3");
+          x_c_keyword ("i18ndp:2,3");
+          x_c_keyword ("i18ndcp:2c,3,4");
+          x_c_keyword ("ki18n:1");
+          x_c_keyword ("ki18nc:1c,2");
+          x_c_keyword ("ki18np:1,2");
+          x_c_keyword ("ki18ncp:1c,2,3");
+          x_c_keyword ("ki18nd:2");
+          x_c_keyword ("ki18ndc:2c,3");
+          x_c_keyword ("ki18ndp:2,3");
+          x_c_keyword ("ki18ndcp:2c,3,4");
+          x_c_keyword ("I18N_NOOP:1");
+          x_c_keyword ("I18NC_NOOP:1c,2");
+          x_c_keyword ("I18N_NOOP2:1c,2");
+          x_c_keyword ("I18N_NOOP2_NOSTRIP:1c,2");
+          x_c_keyword ("xi18n:1");
+          x_c_keyword ("xi18nc:1c,2");
+          x_c_keyword ("xi18np:1,2");
+          x_c_keyword ("xi18ncp:1c,2,3");
+          x_c_keyword ("xi18nd:2");
+          x_c_keyword ("xi18ndc:2c,3");
+          x_c_keyword ("xi18ndp:2,3");
+          x_c_keyword ("xi18ndcp:2c,3,4");
+          x_c_keyword ("kxi18n:1");
+          x_c_keyword ("kxi18nc:1c,2");
+          x_c_keyword ("kxi18np:1,2");
+          x_c_keyword ("kxi18ncp:1c,2,3");
+          x_c_keyword ("kxi18nd:2");
+          x_c_keyword ("kxi18ndc:2c,3");
+          x_c_keyword ("kxi18ndp:2,3");
+          x_c_keyword ("kxi18ndcp:2c,3,4");
+          x_c_keyword ("XI18N_NOOP:1");
+          x_c_keyword ("XI18NC_NOOP:1c,2");
+          x_c_keyword ("XI18N_NOOP2:1c,2");
+          x_c_keyword ("XI18N_NOOP2_NOSTRIP:1c,2");
+        }
+
        x_objc_keyword ("gettext");
        x_objc_keyword ("dgettext:2");
        x_objc_keyword ("dcgettext:2");
@@ -447,6 +502,50 @@ init_flag_table_gcc_internal ()
  #endif
  }
  
+void
+init_flag_table_kde ()
+{
+  xgettext_record_flag ("i18n:1:kde-format");
+  xgettext_record_flag ("i18nc:2:kde-format");
+  xgettext_record_flag ("i18np:1:kde-format");
+  xgettext_record_flag ("i18ncp:2:kde-format");
+  xgettext_record_flag ("i18nd:2:kde-format");
+  xgettext_record_flag ("i18ndc:3:kde-format");
+  xgettext_record_flag ("i18ndp:2:kde-format");
+  xgettext_record_flag ("i18ndcp:3:kde-format");
+  xgettext_record_flag ("ki18n:1:kde-format");
+  xgettext_record_flag ("ki18nc:2:kde-format");
+  xgettext_record_flag ("ki18np:1:kde-format");
+  xgettext_record_flag ("ki18ncp:2:kde-format");
+  xgettext_record_flag ("ki18nd:2:kde-format");
+  xgettext_record_flag ("ki18ndc:3:kde-format");
+  xgettext_record_flag ("ki18ndp:2:kde-format");
+  xgettext_record_flag ("ki18ndcp:3:kde-format");
+  xgettext_record_flag ("I18N_NOOP:1:kde-format");
+  xgettext_record_flag ("I18NC_NOOP:2:kde-format");
+  xgettext_record_flag ("I18N_NOOP2:2:kde-format");
+  xgettext_record_flag ("I18N_NOOP2_NOSTRIP:2:kde-format");
+  xgettext_record_flag ("xi18n:1:kde-kuit-format");
+  xgettext_record_flag ("xi18nc:2:kde-kuit-format");
+  xgettext_record_flag ("xi18np:1:kde-kuit-format");
+  xgettext_record_flag ("xi18ncp:2:kde-kuit-format");
+  xgettext_record_flag ("xi18nd:2:kde-kuit-format");
+  xgettext_record_flag ("xi18ndc:3:kde-kuit-format");
+  xgettext_record_flag ("xi18ndp:2:kde-kuit-format");
+  xgettext_record_flag ("xi18ndcp:3:kde-kuit-format");
+  xgettext_record_flag ("kxi18n:1:kde-kuit-format");
+  xgettext_record_flag ("kxi18nc:2:kde-kuit-format");
+  xgettext_record_flag ("kxi18np:1:kde-kuit-format");
+  xgettext_record_flag ("kxi18ncp:2:kde-kuit-format");
+  xgettext_record_flag ("kxi18nd:2:kde-kuit-format");
+  xgettext_record_flag ("kxi18ndc:3:kde-kuit-format");
+  xgettext_record_flag ("kxi18ndp:2:kde-kuit-format");
+  xgettext_record_flag ("kxi18ndcp:3:kde-kuit-format");
+  xgettext_record_flag ("XI18N_NOOP:1:kde-kuit-format");
+  xgettext_record_flag ("XI18NC_NOOP:2:kde-kuit-format");
+  xgettext_record_flag ("XI18N_NOOP2:2:kde-kuit-format");
+  xgettext_record_flag ("XI18N_NOOP2_NOSTRIP:2:kde-kuit-format");
+}
  
  /* ======================== Reading of characters.  ======================== */
  
@@ -827,6 +926,9 @@ phase4_ungetc (int c)
  /* True if ObjectiveC extensions are recognized.  */
  static bool objc_extensions;
  
+/* True if C++ extensions are recognized.  */
+static bool cxx_extensions;
+
  enum token_type_ty
  {
    token_type_character_constant,        /* 'x' */
@@ -853,173 +955,244 @@ struct token_ty
    char *string;         /* for token_type_name, token_type_string_literal */
    refcounted_string_list_ty *comment;   /* for token_type_string_literal,
                                             token_type_objc_special */
+  enum literalstring_escape_type escape; /* for token_type_string_literal */
    long number;
    int line_number;
  };
  
  
-/* 7. Replace escape sequences within character strings with their
-   single character equivalents.  This is called from phase 5, because
-   we don't have to worry about the #include argument.  There are
-   pathological cases which could bite us (like the DOS directory
-   separator), but just pretend it can't happen.  */
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_name || tp->type == token_type_string_literal)
+    free (tp->string);
+  if (tp->type == token_type_string_literal
+      || tp->type == token_type_objc_special)
+    drop_reference (tp->comment);
+}
  
-#define P7_QUOTES (1000 + '"')
-#define P7_QUOTE (1000 + '\'')
-#define P7_NEWLINE (1000 + '\n')
  
-static int
-phase7_getc ()
+static char *
+literalstring_parse (const char *string, lex_pos_ty *pos,
+                     enum literalstring_escape_type type)
  {
-  int c, n, j;
+  struct mixed_string_buffer *bp;
+  const char *p;
  
-  /* Use phase 3, because phase 4 elides comments.  */
-  c = phase3_getc ();
+  /* Start accumulating the string.  */
+  bp = mixed_string_buffer_alloc (lc_string,
+                                  logical_file_name,
+                                  line_number);
  
-  /* Return a magic newline indicator, so that we can distinguish
-     between the user requesting a newline in the string (e.g. using
-     "\n" or "\012") from the user failing to terminate the string or
-     character constant.  The ANSI C standard says: 3.1.3.4 Character
-     Constants contain "any character except single quote, backslash or
-     newline; or an escape sequence" and 3.1.4 String Literals contain
-     "any character except double quote, backslash or newline; or an
-     escape sequence".
-
-     Most compilers give a fatal error in this case, however gcc is
-     stupidly silent, even though this is a very common typo.  OK, so
-     "gcc --pedantic" will tell me, but that gripes about too much other
-     stuff.  Could I have a "gcc -Wnewline-in-string" option, or
-     better yet a "gcc -fno-newline-in-string" option, please?  Gcc is
-     also inconsistent between string literals and character constants:
-     you may not embed newlines in character constants; try it, you get
-     a useful diagnostic.  --PMiller  */
-  if (c == '\n')
-    return P7_NEWLINE;
-
-  if (c == '"')
-    return P7_QUOTES;
-  if (c == '\'')
-    return P7_QUOTE;
-  if (c != '\\')
-    return c;
-  c = phase3_getc ();
-  switch (c)
+  for (p = string; ; )
      {
-    default:
-      /* Unknown escape sequences really should be an error, but just
-         ignore them, and let the real compiler complain.  */
-      phase3_ungetc (c);
-      return '\\';
-
-    case '"':
-    case '\'':
-    case '?':
-    case '\\':
-      return c;
+      int c = *p++;
  
-    case 'a':
-      return '\a';
-    case 'b':
-      return '\b';
+      if (c == '\0')
+        break;
  
-      /* The \e escape is preculiar to gcc, and assumes an ASCII
-         character set (or superset).  We don't provide support for it
-         here.  */
-
-    case 'f':
-      return '\f';
-    case 'n':
-      return '\n';
-    case 'r':
-      return '\r';
-    case 't':
-      return '\t';
-    case 'v':
-      return '\v';
-
-    case 'x':
-      c = phase3_getc ();
-      switch (c)
+      if (c != '\\')
          {
-        default:
-          phase3_ungetc (c);
-          phase3_ungetc ('x');
-          return '\\';
-
-        case '0': case '1': case '2': case '3': case '4':
-        case '5': case '6': case '7': case '8': case '9':
-        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-          break;
+          mixed_string_buffer_append_char (bp, c);
+          continue;
          }
-      n = 0;
-      for (;;)
+
+      if (!(type & LET_ANSI_C) && !(type & LET_UNICODE))
          {
-          switch (c)
-            {
-            default:
-              phase3_ungetc (c);
-              return n;
+          mixed_string_buffer_append_char (bp, '\\');
+          continue;
+        }
  
-            case '0': case '1': case '2': case '3': case '4':
-            case '5': case '6': case '7': case '8': case '9':
-              n = n * 16 + c - '0';
+      c = *p++;
+      if (c == '\0')
+        break;
+
+      if (type & LET_ANSI_C)
+        switch (c)
+          {
+          case '"':
+          case '\'':
+          case '?':
+          case '\\':
+            mixed_string_buffer_append_char (bp, c);
+            continue;
+
+          case 'a':
+            mixed_string_buffer_append_char (bp, '\a');
+            continue;
+          case 'b':
+            mixed_string_buffer_append_char (bp, '\b');
+            continue;
+
+            /* The \e escape is preculiar to gcc, and assumes an ASCII
+               character set (or superset).  We don't provide support for it
+               here.  */
+
+          case 'f':
+            mixed_string_buffer_append_char (bp, '\f');
+            continue;
+          case 'n':
+            mixed_string_buffer_append_char (bp, '\n');
+            continue;
+          case 'r':
+            mixed_string_buffer_append_char (bp, '\r');
+            continue;
+          case 't':
+            mixed_string_buffer_append_char (bp, '\t');
+            continue;
+          case 'v':
+            mixed_string_buffer_append_char (bp, '\v');
+            continue;
+
+          case 'x':
+            c = *p++;
+            if (c == '\0')
                break;
+            switch (c)
+              {
+              default:
+                mixed_string_buffer_append_char (bp, '\\');
+                mixed_string_buffer_append_char (bp, 'x');
+                mixed_string_buffer_append_char (bp, c);
+                break;
+
+              case '0': case '1': case '2': case '3': case '4':
+              case '5': case '6': case '7': case '8': case '9':
+              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                {
+                  int n;
+
+                  for (n = 0; ; c = *p++)
+                    {
+                      switch (c)
+                        {
+                        default:
+                          break;
+
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                          n = n * 16 + c - '0';
+                          continue;
+
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                          n = n * 16 + 10 + c - 'A';
+                          continue;
+
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                          n = n * 16 + 10 + c - 'a';
+                          continue;
+                        }
+                      break;
+                    }
+
+                  mixed_string_buffer_append_char (bp, n);
+                  --p;
+                }
+                break;
+              }
+            continue;
  
-            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-              n = n * 16 + 10 + c - 'A';
-              break;
+          case '0': case '1': case '2': case '3':
+          case '4': case '5': case '6': case '7':
+            {
+              int n, j;
  
-            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-              n = n * 16 + 10 + c - 'a';
-              break;
+              for (n = 0, j = 0; j < 3; ++j)
+                {
+                  n = n * 8 + c - '0';
+                  c = *p++;
+                  switch (c)
+                    {
+                    default:
+                      break;
+
+                    case '0': case '1': case '2': case '3':
+                    case '4': case '5': case '6': case '7':
+                      continue;
+                    }
+                  break;
+                }
+
+              mixed_string_buffer_append_char (bp, n);
+              --p;
              }
-          c = phase3_getc ();
-        }
-      return n;
+            continue;
+          }
  
-    case '0': case '1': case '2': case '3':
-    case '4': case '5': case '6': case '7':
-      n = 0;
-      for (j = 0; j < 3; ++j)
-        {
-          n = n * 8 + c - '0';
-          c = phase3_getc ();
-          switch (c)
+      if (type & LET_UNICODE)
+        switch (c)
+          {
+          case 'U': case 'u':
              {
-            default:
-              break;
+              unsigned char buf[8];
+              int prefix = c;
+              int length = prefix == 'u' ? 4 : 8;
+              int n, j;
  
-            case '0': case '1': case '2': case '3':
-            case '4': case '5': case '6': case '7':
-              continue;
-            }
-          break;
-        }
-      phase3_ungetc (c);
-      return n;
-    }
-}
+              for (n = 0, j = 0; j < length; j++)
+                {
+                  c = *p++;
+
+                  if (c >= '0' && c <= '9')
+                    n = (n << 4) + (c - '0');
+                  else if (c >= 'A' && c <= 'F')
+                    n = (n << 4) + (c - 'A' + 10);
+                  else if (c >= 'a' && c <= 'f')
+                    n = (n << 4) + (c - 'a' + 10);
+                  else
+                    break;
+
+                  buf[j] = c;
+                }
  
+              if (j == length)
+                {
+                  if (n < 0x110000)
+                    mixed_string_buffer_append_unicode (bp, n);
+                  else
+                    {
+                      error_with_progname = false;
+                      error_at_line (0, 0,
+                                     pos->file_name, pos->line_number,
+                                     _("\
+warning: invalid Unicode character"));
+                      error_with_progname = true;
+                    }
+                }
+              else
+                {
+                  int i;
  
-static void
-phase7_ungetc (int c)
-{
-  phase3_ungetc (c);
-}
+                  mixed_string_buffer_append_char (bp, '\\');
+                  mixed_string_buffer_append_char (bp, prefix);
  
+                  for (i = 0; i < j; i++)
+                    mixed_string_buffer_append_char (bp, buf[i]);
  
-/* Free the memory pointed to by a 'struct token_ty'.  */
-static inline void
-free_token (token_ty *tp)
-{
-  if (tp->type == token_type_name || tp->type == token_type_string_literal)
-    free (tp->string);
-  if (tp->type == token_type_string_literal
-      || tp->type == token_type_objc_special)
-    drop_reference (tp->comment);
+                  --p;
+                }
+            }
+            continue;
+          }
+
+      if (c == '\0')
+        break;
+
+      mixed_string_buffer_append_char (bp, c);
+    }
+
+  return mixed_string_buffer_done (bp);
  }
  
+struct literalstring_parser literalstring_c =
+  {
+    literalstring_parse
+  };
+
  
  /* 5. Parse each resulting logical line as preprocessing tokens and
     white space.  Preprocessing tokens and C tokens don't always match.  */
@@ -1035,6 +1208,11 @@ phase5_get (token_ty *tp)
    static int bufmax;
    int bufpos;
    int c;
+  int last_was_backslash;
+  bool raw_expected = false;
+  int delimiter_left_end;
+  int delimiter_right_start;
+  int last_rparen;
  
    if (phase5_pushback_length)
      {
@@ -1114,6 +1292,68 @@ phase5_get (token_ty *tp)
                continue;
  
              default:
+              /* Recognize string literals prefixed by R, u8, u8R, u,
+                 uR, U, UR, L, or LR.  It is defined in the C standard
+                 ISO/IEC 9899:201x and the C++ standard ISO/IEC
+                 14882:2011.  The raw string literals prefixed by R,
+                 u8R, uR, UR, or LR are only valid in C++.
+
+                 Since gettext's argument is a byte sequence, we are
+                 only interested in u8, R, and u8R.  */
+              if (c == '"')
+                {
+                  bool is_prefix = false;
+
+                  switch (buffer[0])
+                    {
+                    case 'R':
+                      if (cxx_extensions && bufpos == 1)
+                        {
+                          is_prefix = true;
+                          raw_expected = true;
+                        }
+                      break;
+                    case 'u':
+                      if (bufpos == 1)
+                        is_prefix = true;
+                      else
+                        switch (buffer[1])
+                          {
+                          case 'R':
+                            if (cxx_extensions && bufpos == 2)
+                              {
+                                is_prefix = true;
+                                raw_expected = true;
+                              }
+                            break;
+                          case '8':
+                            if (bufpos == 2)
+                              is_prefix = true;
+                            else if (cxx_extensions
+                                     && bufpos == 3 && buffer[2] == 'R')
+                              {
+                                is_prefix = true;
+                                raw_expected = true;
+                              }
+                            break;
+                          }
+                      break;
+                    case 'U':
+                    case 'L':
+                      if (bufpos == 1)
+                        is_prefix = true;
+                      else if (cxx_extensions
+                               && bufpos == 2 && buffer[1] == 'R')
+                        {
+                          is_prefix = true;
+                          raw_expected = true;
+                        }
+                      break;
+                    }
+
+                  if (is_prefix)
+                    goto string;
+                }
                phase4_ungetc (c);
                break;
              }
@@ -1214,63 +1454,151 @@ phase5_get (token_ty *tp)
           but ignoring it has no effect unless one of the keywords is
           "L".  Just pretend it won't happen.  Also, we don't need to
           remember the character constant.  */
+      last_was_backslash = false;
        for (;;)
          {
-          c = phase7_getc ();
-          if (c == P7_NEWLINE)
+          c = phase3_getc ();
+          if (last_was_backslash)
              {
+              last_was_backslash = false;
+              continue;
+            }
+          switch (c)
+            {
+            case '\\':
+              last_was_backslash = true;
+              /* FALLTHROUGH */
+            default:
+              continue;
+            case '\n':
                error_with_progname = false;
                error (0, 0, _("%s:%d: warning: unterminated character constant"),
                       logical_file_name, line_number - 1);
                error_with_progname = true;
-              phase7_ungetc ('\n');
+              phase3_ungetc ('\n');
+              break;
+            case EOF: case '\'':
                break;
              }
-          if (c == EOF || c == P7_QUOTE)
-            break;
+          break;
          }
        tp->type = token_type_character_constant;
        return;
  
      case '"':
-      /* We could worry about the 'L' before wide string constants,
-         but since gettext's argument is not a wide character string,
-         let the compiler complain about the argument not matching the
-         prototype.  Just pretend it won't happen.  */
-      bufpos = 0;
-      for (;;)
-        {
-          c = phase7_getc ();
-          if (c == P7_NEWLINE)
-            {
-              error_with_progname = false;
-              error (0, 0, _("%s:%d: warning: unterminated string literal"),
-                     logical_file_name, line_number - 1);
-              error_with_progname = true;
-              phase7_ungetc ('\n');
-              break;
-            }
-          if (c == EOF || c == P7_QUOTES)
+      {
+      string:
+        /* We could worry about the 'L' before wide string constants,
+           but since gettext's argument is not a wide character string,
+           let the compiler complain about the argument not matching the
+           prototype.  Just pretend it won't happen.  */
+        last_was_backslash = false;
+        delimiter_left_end = -1;
+        delimiter_right_start = -1;
+        last_rparen = -1;
+        bufpos = 0;
+        for (;;)
+          {
+            c = phase3_getc ();
+            if (last_was_backslash && !raw_expected)
+              {
+                last_was_backslash = false;
+                if (bufpos >= bufmax)
+                  {
+                    bufmax = 2 * bufmax + 10;
+                    buffer = xrealloc (buffer, bufmax);
+                  }
+                buffer[bufpos++] = c;
+                continue;
+              }
+            switch (c)
+              {
+              case '\\':
+                last_was_backslash = true;
+                /* FALLTHROUGH */
+              default:
+                if (raw_expected)
+                  {
+                    if (c == '(' && delimiter_left_end < 0)
+                      delimiter_left_end = bufpos;
+                    else if (c == ')' && delimiter_left_end >= 0)
+                      last_rparen = bufpos;
+                  }
+                else if (c == '\n')
+                  {
+                    error_with_progname = false;
+                    error (0, 0,
+                           _("%s:%d: warning: unterminated string literal"),
+                           logical_file_name, line_number - 1);
+                    error_with_progname = true;
+                    phase3_ungetc ('\n');
+                    break;
+                  }
+                if (bufpos >= bufmax)
+                  {
+                    bufmax = 2 * bufmax + 10;
+                    buffer = xrealloc (buffer, bufmax);
+                  }
+                buffer[bufpos++] = c;
+                continue;
+
+              case '"':
+                if (raw_expected && delimiter_left_end >= 0)
+                  {
+                    if (last_rparen < 0
+                        || delimiter_left_end != bufpos - (last_rparen + 1)
+                        || strncmp (buffer, buffer + last_rparen + 1,
+                                    delimiter_left_end) != 0)
+                      {
+                        if (bufpos >= bufmax)
+                          {
+                            bufmax = 2 * bufmax + 10;
+                            buffer = xrealloc (buffer, bufmax);
+                          }
+                        buffer[bufpos++] = c;
+                        continue;
+                      }
+                    delimiter_right_start = last_rparen;
+                  }
+                break;
+
+              case EOF:
+                break;
+              }
              break;
-          if (c == P7_QUOTE)
-            c = '\'';
-          if (bufpos >= bufmax)
-            {
-              bufmax = 2 * bufmax + 10;
-              buffer = xrealloc (buffer, bufmax);
-            }
-          buffer[bufpos++] = c;
-        }
-      if (bufpos >= bufmax)
-        {
-          bufmax = 2 * bufmax + 10;
-          buffer = xrealloc (buffer, bufmax);
-        }
-      buffer[bufpos] = 0;
-      tp->type = token_type_string_literal;
-      tp->string = xstrdup (buffer);
-      tp->comment = add_reference (savable_comment);
-      return;
+          }
+        if (bufpos >= bufmax)
+          {
+            bufmax = 2 * bufmax + 10;
+            buffer = xrealloc (buffer, bufmax);
+          }
+        buffer[bufpos] = 0;
+
+        if (raw_expected)
+          {
+            if (delimiter_left_end < 0 || delimiter_right_start < 0)
+              {
+                error_with_progname = false;
+                error (0, 0, _("%s:%d: warning: unterminated string literal"),
+                       logical_file_name, line_number - 1);
+                error_with_progname = true;
+              }
+            else
+              {
+                buffer[delimiter_right_start] = '\0';
+                tp->type = token_type_string_literal;
+                tp->string = xstrdup (&buffer[delimiter_left_end + 1]);
+                tp->escape = LET_NONE;
+                tp->comment = add_reference (savable_comment);
+                return;
+              }
+          }
+        tp->type = token_type_string_literal;
+        tp->string = xstrdup (buffer);
+        tp->escape = LET_ANSI_C | LET_UNICODE;
+        tp->comment = add_reference (savable_comment);
+        return;
+      }
  
      case '(':
        tp->type = token_type_lparen;
@@ -1303,8 +1631,8 @@ phase5_get (token_ty *tp)
  
      default:
        /* We could carefully recognize each of the 2 and 3 character
-        operators, but it is not necessary, as we only need to recognize
-        gettext invocations.  Don't bother.  */
+         operators, but it is not necessary, as we only need to recognize
+         gettext invocations.  Don't bother.  */
        tp->type = token_type_symbol;
        return;
      }
@@ -1521,6 +1849,7 @@ phase8a_get (token_ty *tp)
        tp->string = new_string;
        tp->comment = add_reference (savable_comment);
        tp->type = token_type_string_literal;
+      tp->escape = LET_ANSI_C | LET_UNICODE;
      }
  }
  
@@ -1601,7 +1930,10 @@ phase8c_unget (token_ty *tp)
  
  /* 8. Concatenate adjacent string literals to form single string
     literals (because we don't expand macros, there are a few things we
-   will miss).  */
+   will miss).
+
+   FIXME: handle the case when the string literals have different
+   tp->escape setting.  */
  
  static void
  phase8_get (token_ty *tp)
@@ -1658,6 +1990,9 @@ struct xgettext_token_ty
    char *string;
  
    /* This field is used only for xgettext_token_type_string_literal.  */
+  enum literalstring_escape_type escape;
+
+  /* This field is used only for xgettext_token_type_string_literal.  */
    refcounted_string_list_ty *comment;
  
    /* These fields are only for
@@ -1732,6 +2067,7 @@ x_c_lex (xgettext_token_ty *tp)
  
            tp->type = xgettext_token_type_string_literal;
            tp->string = token.string;
+          tp->escape = token.escape;
            tp->comment = token.comment;
            tp->pos.file_name = logical_file_name;
            tp->pos.line_number = token.line_number;
@@ -1887,13 +2223,40 @@ extract_parenthesized (message_list_ty *mlp,
  
          case xgettext_token_type_string_literal:
            if (extract_all)
-            remember_a_message (mlp, NULL, token.string, inner_context,
-                                &token.pos, NULL, token.comment);
+            {
+              char *string;
+              refcounted_string_list_ty *comment;
+              const char *encoding;
+
+              string = literalstring_parse (token.string, &token.pos,
+                                            token.escape);
+              free (token.string);
+              token.string = string;
+
+              if (token.comment != NULL)
+                {
+                  comment = savable_comment_convert_encoding (token.comment,
+                                                              &token.pos);
+                  drop_reference (token.comment);
+                  token.comment = comment;
+                }
+
+              /* token.string and token.comment are already converted
+                 to UTF-8.  Prevent further conversion in
+                 remember_a_message.  */
+              encoding = xgettext_current_source_encoding;
+              xgettext_current_source_encoding = po_charset_utf8;
+              remember_a_message (mlp, NULL, token.string, inner_context,
+                                  &token.pos, NULL, token.comment);
+              xgettext_current_source_encoding = encoding;
+            }
            else
-            arglist_parser_remember (argparser, arg, token.string,
-                                     inner_context,
-                                     token.pos.file_name, token.pos.line_number,
-                                     token.comment);
+            arglist_parser_remember_literal (argparser, arg, token.string,
+                                             inner_context,
+                                             token.pos.file_name,
+                                             token.pos.line_number,
+                                             token.comment,
+                                             token.escape);
            drop_reference (token.comment);
            next_context_iter = null_context_list_iterator;
            selectorcall_context_iter = null_context_list_iterator;
@@ -1959,6 +2322,18 @@ extract_c (FILE *f,
             msgdomain_list_ty *mdlp)
  {
    objc_extensions = false;
+  cxx_extensions = false;
+  extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
+}
+
+void
+extract_cxx (FILE *f,
+             const char *real_filename, const char *logical_filename,
+             flag_context_list_table_ty *flag_table,
+             msgdomain_list_ty *mdlp)
+{
+  objc_extensions = false;
+  cxx_extensions = true;
    extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
  }
  
@@ -1969,5 +2344,6 @@ extract_objc (FILE *f,
                msgdomain_list_ty *mdlp)
  {
    objc_extensions = true;
+  cxx_extensions = false;
    extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
  }