1 /* xgettext Perl backend.
2 Copyright (C) 2002-2010, 2015 Free Software Foundation, Inc.
4 This file was written by Guido Flohr <guido@imperia.net>, 2002-2010.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 #include "error-progname.h"
37 #include "po-charset.h"
42 #define _(s) gettext(s)
44 /* The Perl syntax is defined in perlsyn.pod. Try the command
45 "man perlsyn" or "perldoc perlsyn".
46 Also, the syntax after the 'sub' keyword is specified in perlsub.pod.
47 Try the command "man perlsub" or "perldoc perlsub".
48 Perl 5.10 has new operators '//' and '//=', see
49 <http://perldoc.perl.org/perldelta.html#Defined-or-operator>. */
54 /* ====================== Keyword set customization. ====================== */
56 /* If true extract all strings. */
57 static bool extract_all = false;
59 static hash_table keywords;
60 static bool default_keywords = true;
71 x_perl_keyword (const char *name)
74 default_keywords = false;
78 struct callshape shape;
81 if (keywords.table == NULL)
82 hash_init (&keywords, 100);
84 split_keywordspec (name, &end, &shape);
86 /* The characters between name and end should form a valid C identifier.
87 A colon means an invalid parse in split_keywordspec(). */
88 colon = strchr (name, ':');
89 if (colon == NULL || colon >= end)
90 insert_keyword_callshape (&keywords, name, end - name, &shape);
94 /* Finish initializing the keywords hash table.
95 Called after argument processing, before each file is processed. */
101 /* When adding new keywords here, also update the documentation in
103 x_perl_keyword ("gettext");
104 x_perl_keyword ("%gettext");
105 x_perl_keyword ("$gettext");
106 x_perl_keyword ("dgettext:2");
107 x_perl_keyword ("dcgettext:2");
108 x_perl_keyword ("ngettext:1,2");
109 x_perl_keyword ("dngettext:2,3");
110 x_perl_keyword ("dcngettext:2,3");
111 x_perl_keyword ("gettext_noop");
113 x_perl_keyword ("__");
114 x_perl_keyword ("$__");
115 x_perl_keyword ("%__");
116 x_perl_keyword ("__x");
117 x_perl_keyword ("__n:1,2");
118 x_perl_keyword ("__nx:1,2");
119 x_perl_keyword ("__xn:1,2");
120 x_perl_keyword ("N__");
122 default_keywords = false;
127 init_flag_table_perl ()
129 xgettext_record_flag ("gettext:1:pass-perl-format");
130 xgettext_record_flag ("gettext:1:pass-perl-brace-format");
131 xgettext_record_flag ("%gettext:1:pass-perl-format");
132 xgettext_record_flag ("%gettext:1:pass-perl-brace-format");
133 xgettext_record_flag ("$gettext:1:pass-perl-format");
134 xgettext_record_flag ("$gettext:1:pass-perl-brace-format");
135 xgettext_record_flag ("dgettext:2:pass-perl-format");
136 xgettext_record_flag ("dgettext:2:pass-perl-brace-format");
137 xgettext_record_flag ("dcgettext:2:pass-perl-format");
138 xgettext_record_flag ("dcgettext:2:pass-perl-brace-format");
139 xgettext_record_flag ("ngettext:1:pass-perl-format");
140 xgettext_record_flag ("ngettext:2:pass-perl-format");
141 xgettext_record_flag ("ngettext:1:pass-perl-brace-format");
142 xgettext_record_flag ("ngettext:2:pass-perl-brace-format");
143 xgettext_record_flag ("dngettext:2:pass-perl-format");
144 xgettext_record_flag ("dngettext:3:pass-perl-format");
145 xgettext_record_flag ("dngettext:2:pass-perl-brace-format");
146 xgettext_record_flag ("dngettext:3:pass-perl-brace-format");
147 xgettext_record_flag ("dcngettext:2:pass-perl-format");
148 xgettext_record_flag ("dcngettext:3:pass-perl-format");
149 xgettext_record_flag ("dcngettext:2:pass-perl-brace-format");
150 xgettext_record_flag ("dcngettext:3:pass-perl-brace-format");
151 xgettext_record_flag ("gettext_noop:1:pass-perl-format");
152 xgettext_record_flag ("gettext_noop:1:pass-perl-brace-format");
153 xgettext_record_flag ("printf:1:perl-format"); /* argument 1 or 2 ?? */
154 xgettext_record_flag ("sprintf:1:perl-format");
156 xgettext_record_flag ("__:1:pass-perl-format");
157 xgettext_record_flag ("__:1:pass-perl-brace-format");
158 xgettext_record_flag ("%__:1:pass-perl-format");
159 xgettext_record_flag ("%__:1:pass-perl-brace-format");
160 xgettext_record_flag ("$__:1:pass-perl-format");
161 xgettext_record_flag ("$__:1:pass-perl-brace-format");
162 xgettext_record_flag ("__x:1:perl-brace-format");
163 xgettext_record_flag ("__n:1:pass-perl-format");
164 xgettext_record_flag ("__n:2:pass-perl-format");
165 xgettext_record_flag ("__n:1:pass-perl-brace-format");
166 xgettext_record_flag ("__n:2:pass-perl-brace-format");
167 xgettext_record_flag ("__nx:1:perl-brace-format");
168 xgettext_record_flag ("__nx:2:perl-brace-format");
169 xgettext_record_flag ("__xn:1:perl-brace-format");
170 xgettext_record_flag ("__xn:2:perl-brace-format");
171 xgettext_record_flag ("N__:1:pass-perl-format");
172 xgettext_record_flag ("N__:1:pass-perl-brace-format");
177 /* ======================== Reading of characters. ======================== */
179 /* Real filename, used in error messages about the input file. */
180 static const char *real_file_name;
182 /* Logical filename and line number, used to label the extracted messages. */
183 static char *logical_file_name;
184 static int line_number;
186 /* The input file stream. */
189 /* The current line buffer. */
190 static char *linebuf;
192 /* The size of the current line. */
195 /* The position in the current line. */
198 /* The size of the input buffer. */
199 static size_t linebuf_size;
201 /* Number of lines eaten for here documents. */
202 static int eaten_here;
204 /* Paranoia: EOF marker for __END__ or __DATA__. */
205 static bool end_of_file;
208 /* 1. line_number handling. */
210 /* Returns the next character from the input stream or EOF. */
214 line_number += eaten_here;
220 if (linepos >= linesize)
222 linesize = getline (&linebuf, &linebuf_size, fp);
227 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
236 /* Undosify. This is important for catching the end of <<EOF and
237 <<'EOF'. We could rely on stdio doing this for us but
238 it is not uncommon to to come across Perl scripts with CRLF
239 newline conventions on systems that do not follow this
241 if (linesize >= 2 && linebuf[linesize - 1] == '\n'
242 && linebuf[linesize - 2] == '\r')
244 linebuf[linesize - 2] = '\n';
245 linebuf[linesize - 1] = '\0';
250 return linebuf[linepos++];
253 /* Supports only one pushback character. */
255 phase1_ungetc (int c)
260 /* Attempt to ungetc across line boundary. Shouldn't happen.
261 No two phase1_ungetc calls are permitted in a row. */
268 /* Read a here document and return its contents.
269 The delimiter is an UTF-8 encoded string; the resulting string is UTF-8
273 get_here_document (const char *delimiter)
275 /* Accumulator for the entire here document, including a NUL byte
278 static size_t bufmax = 0;
280 /* Current line being appended. */
281 static char *my_linebuf = NULL;
282 static size_t my_linebuf_size = 0;
284 /* Allocate the initial buffer. Later on, bufmax > 0. */
287 buffer = XNMALLOC (1, char);
294 int read_bytes = getline (&my_linebuf, &my_linebuf_size, fp);
302 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
307 error_with_progname = false;
308 error (EXIT_SUCCESS, 0, _("\
309 %s:%d: can't find string terminator \"%s\" anywhere before EOF"),
310 real_file_name, line_number, delimiter);
311 error_with_progname = true;
319 /* Convert to UTF-8. */
321 from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
322 line_number + eaten_here);
323 if (my_line_utf8 != my_linebuf)
325 if (strlen (my_line_utf8) >= my_linebuf_size)
327 my_linebuf_size = strlen (my_line_utf8) + 1;
328 my_linebuf = xrealloc (my_linebuf, my_linebuf_size);
330 strcpy (my_linebuf, my_line_utf8);
334 /* Undosify. This is important for catching the end of <<EOF and
335 <<'EOF'. We could rely on stdio doing this for us but you
336 it is not uncommon to to come across Perl scripts with CRLF
337 newline conventions on systems that do not follow this
339 if (read_bytes >= 2 && my_linebuf[read_bytes - 1] == '\n'
340 && my_linebuf[read_bytes - 2] == '\r')
342 my_linebuf[read_bytes - 2] = '\n';
343 my_linebuf[read_bytes - 1] = '\0';
347 /* Temporarily remove the trailing newline from my_linebuf. */
349 if (read_bytes >= 1 && my_linebuf[read_bytes - 1] == '\n')
352 my_linebuf[read_bytes - 1] = '\0';
355 /* See whether this line terminates the here document. */
356 if (strcmp (my_linebuf, delimiter) == 0)
359 /* Add back the trailing newline to my_linebuf. */
361 my_linebuf[read_bytes - 1] = '\n';
363 /* Ensure room for read_bytes + 1 bytes. */
364 if (bufpos + read_bytes >= bufmax)
367 bufmax = 2 * bufmax + 10;
368 while (bufpos + read_bytes >= bufmax);
369 buffer = xrealloc (buffer, bufmax);
371 /* Append this line to the accumulator. */
372 strcpy (buffer + bufpos, my_linebuf);
373 bufpos += read_bytes;
376 /* Done accumulating the here document. */
377 return xstrdup (buffer);
380 /* Skips pod sections. */
384 line_number += eaten_here;
390 linesize = getline (&linebuf, &linebuf_size, fp);
395 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
402 if (strncmp ("=cut", linebuf, 4) == 0)
404 /* Force reading of a new line on next call to phase1_getc(). */
412 /* These are for tracking whether comments count as immediately before
414 static int last_comment_line;
415 static int last_non_comment_line;
418 /* 2. Replace each comment that is not inside a string literal or regular
419 expression with a newline character. We need to remember the comment
420 for later, because it may be attached to a keyword string. */
426 static size_t bufmax;
436 lineno = line_number;
437 /* Skip leading whitespace. */
443 if (c != ' ' && c != '\t' && c != '\r' && c != '\f')
449 /* Accumulate the comment. */
453 if (c == '\n' || c == EOF)
455 if (buflen >= bufmax)
457 bufmax = 2 * bufmax + 10;
458 buffer = xrealloc (buffer, bufmax);
460 buffer[buflen++] = c;
462 if (buflen >= bufmax)
464 bufmax = 2 * bufmax + 10;
465 buffer = xrealloc (buffer, bufmax);
467 buffer[buflen] = '\0';
468 /* Convert it to UTF-8. */
470 from_current_source_encoding (buffer, lc_comment, logical_file_name,
472 /* Save it until we encounter the corresponding string. */
473 savable_comment_add (utf8_string);
474 last_comment_line = lineno;
479 /* Supports only one pushback character. */
481 phase2_ungetc (int c)
487 /* Whitespace recognition. */
489 #define case_whitespace \
490 case ' ': case '\t': case '\r': case '\n': case '\f'
493 is_whitespace (int c)
495 return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
499 /* ========================== Reading of tokens. ========================== */
505 token_type_lparen, /* ( */
506 token_type_rparen, /* ) */
507 token_type_comma, /* , */
508 token_type_fat_comma, /* => */
509 token_type_dereference, /* -> */
510 token_type_semicolon, /* ; */
511 token_type_lbrace, /* { */
512 token_type_rbrace, /* } */
513 token_type_lbracket, /* [ */
514 token_type_rbracket, /* ] */
515 token_type_string, /* quote-like */
516 token_type_number, /* starting with a digit o dot */
517 token_type_named_op, /* if, unless, while, ... */
518 token_type_variable, /* $... */
519 token_type_object, /* A dereferenced variable, maybe a blessed
521 token_type_symbol, /* symbol, number */
522 token_type_regex_op, /* s, tr, y, m. */
523 token_type_dot, /* . */
524 token_type_other, /* regexp, misc. operator */
525 /* The following are not really token types, but variants used by
527 token_type_keyword_symbol /* keyword symbol */
529 typedef enum token_type_ty token_type_ty;
531 /* Subtypes for strings, important for interpolation. */
534 string_type_verbatim, /* "<<'EOF'", "m'...'", "s'...''...'",
535 "tr/.../.../", "y/.../.../". */
536 string_type_q, /* "'..'", "q/.../". */
537 string_type_qq, /* '"..."', "`...`", "qq/.../", "qx/.../",
539 string_type_qr /* Not supported. */
542 /* Subtypes for symbols, important for dollar interpretation. */
545 symbol_type_none, /* Nothing special. */
546 symbol_type_sub, /* 'sub'. */
547 symbol_type_function /* Function name after 'sub'. */
550 typedef struct token_ty token_ty;
554 token_type_ty last_type;
555 int sub_type; /* for token_type_string, token_type_symbol */
556 char *string; /* for: in encoding:
557 token_type_named_op ASCII
558 token_type_string UTF-8
559 token_type_symbol ASCII
560 token_type_variable global_source_encoding
561 token_type_object global_source_encoding
563 refcounted_string_list_ty *comment; /* for token_type_string */
569 token2string (const token_ty *token)
574 return "token_type_eof";
575 case token_type_lparen:
576 return "token_type_lparen";
577 case token_type_rparen:
578 return "token_type_rparen";
579 case token_type_comma:
580 return "token_type_comma";
581 case token_type_fat_comma:
582 return "token_type_fat_comma";
583 case token_type_dereference:
584 return "token_type_dereference";
585 case token_type_semicolon:
586 return "token_type_semicolon";
587 case token_type_lbrace:
588 return "token_type_lbrace";
589 case token_type_rbrace:
590 return "token_type_rbrace";
591 case token_type_lbracket:
592 return "token_type_lbracket";
593 case token_type_rbracket:
594 return "token_type_rbracket";
595 case token_type_string:
596 return "token_type_string";
597 case token_type_number:
598 return "token type number";
599 case token_type_named_op:
600 return "token_type_named_op";
601 case token_type_variable:
602 return "token_type_variable";
603 case token_type_object:
604 return "token_type_object";
605 case token_type_symbol:
606 return "token_type_symbol";
607 case token_type_regex_op:
608 return "token_type_regex_op";
610 return "token_type_dot";
611 case token_type_other:
612 return "token_type_other";
619 /* Free the memory pointed to by a 'struct token_ty'. */
621 free_token (token_ty *tp)
625 case token_type_named_op:
626 case token_type_string:
627 case token_type_symbol:
628 case token_type_variable:
629 case token_type_object:
635 if (tp->type == token_type_string)
636 drop_reference (tp->comment);
640 /* Pass 1 of extracting quotes: Find the end of the string, regardless
641 of the semantics of the construct. Return the complete string,
642 including the starting and the trailing delimiter, with backslashes
643 removed where appropriate. */
645 extract_quotelike_pass1 (int delim)
647 /* This function is called recursively. No way to allocate stuff
648 statically. Also alloca() is inappropriate due to limited stack
649 size on some platforms. So we use malloc(). */
651 char *buffer = XNMALLOC (bufmax, char);
656 buffer[bufpos++] = delim;
658 /* Find the closing delimiter. */
673 default: /* "..." or '...' or |...| etc. */
675 counter_delim = delim;
681 int c = phase1_getc ();
683 /* This round can produce 1 or 2 bytes. Ensure room for 2 bytes. */
684 if (bufpos + 2 > bufmax)
686 bufmax = 2 * bufmax + 10;
687 buffer = xrealloc (buffer, bufmax);
690 if (c == counter_delim || c == EOF)
692 buffer[bufpos++] = counter_delim; /* will be stripped off later */
693 buffer[bufpos++] = '\0';
695 fprintf (stderr, "PASS1: %s\n", buffer);
700 if (nested && c == delim)
702 char *inner = extract_quotelike_pass1 (delim);
703 size_t len = strlen (inner);
705 /* Ensure room for len + 1 bytes. */
706 if (bufpos + len >= bufmax)
709 bufmax = 2 * bufmax + 10;
710 while (bufpos + len >= bufmax);
711 buffer = xrealloc (buffer, bufmax);
713 strcpy (buffer + bufpos, inner);
722 buffer[bufpos++] = '\\';
723 buffer[bufpos++] = '\\';
725 else if (c == delim || c == counter_delim)
727 /* This is pass2 in Perl. */
728 buffer[bufpos++] = c;
732 buffer[bufpos++] = '\\';
738 buffer[bufpos++] = c;
743 /* Like extract_quotelike_pass1, but return the complete string in UTF-8
746 extract_quotelike_pass1_utf8 (int delim)
748 char *string = extract_quotelike_pass1 (delim);
750 from_current_source_encoding (string, lc_string, logical_file_name,
752 if (utf8_string != string)
758 /* ========= Reading of tokens and commands. Extracting strings. ========= */
761 /* Context lookup table. */
762 static flag_context_list_table_ty *flag_context_list_table;
765 /* Forward declaration of local functions. */
766 static void interpolate_keywords (message_list_ty *mlp, const char *string,
768 static token_ty *x_perl_lex (message_list_ty *mlp);
769 static void x_perl_unlex (token_ty *tp);
770 static bool extract_balanced (message_list_ty *mlp,
771 token_type_ty delim, bool eat_delim,
773 flag_context_ty outer_context,
774 flag_context_list_iterator_ty context_iter,
775 int arg, struct arglist_parser *argparser);
778 /* Extract an unsigned hexadecimal number from STRING, considering at
779 most LEN bytes and place the result in *RESULT. Returns a pointer
780 to the first character past the hexadecimal number. */
782 extract_hex (const char *string, size_t len, unsigned int *result)
788 for (i = 0; i < len; i++)
793 if (c >= 'A' && c <= 'F')
794 number = c - 'A' + 10;
795 else if (c >= 'a' && c <= 'f')
796 number = c - 'a' + 10;
797 else if (c >= '0' && c <= '9')
809 /* Extract an unsigned octal number from STRING, considering at
810 most LEN bytes and place the result in *RESULT. Returns a pointer
811 to the first character past the octal number. */
813 extract_oct (const char *string, size_t len, unsigned int *result)
819 for (i = 0; i < len; i++)
824 if (c >= '0' && c <= '7')
836 /* Extract the various quotelike constructs except for <<EOF. See the
837 section "Gory details of parsing quoted constructs" in perlop.pod.
838 Return the resulting token in *tp; tp->type == token_type_string. */
840 extract_quotelike (token_ty *tp, int delim)
842 char *string = extract_quotelike_pass1_utf8 (delim);
843 size_t len = strlen (string);
845 tp->type = token_type_string;
846 /* Take the string without the delimiters at the start and at the end. */
849 string[len - 1] = '\0';
850 tp->string = xstrdup (string + 1);
852 tp->comment = add_reference (savable_comment);
855 /* Extract the quotelike constructs with double delimiters, like
856 s/[SEARCH]/[REPLACE]/. This function does not eat up trailing
857 modifiers (left to the caller).
858 Return the resulting token in *tp; tp->type == token_type_regex_op. */
860 extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
865 tp->type = token_type_regex_op;
867 string = extract_quotelike_pass1_utf8 (delim);
869 interpolate_keywords (mlp, string, line_number);
872 if (delim == '(' || delim == '<' || delim == '{' || delim == '[')
874 /* The delimiter for the second string can be different, e.g.
875 s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/. See "man perlrequick". */
876 delim = phase1_getc ();
877 while (is_whitespace (delim))
879 /* The hash-sign is not a valid delimiter after whitespace, ergo
880 use phase2_getc() and not phase1_getc() now. */
881 delim = phase2_getc ();
884 string = extract_quotelike_pass1_utf8 (delim);
886 interpolate_keywords (mlp, string, line_number);
890 /* Perform pass 3 of quotelike extraction (interpolation).
891 *tp is a token of type token_type_string.
892 This function replaces tp->string.
893 This function does not access tp->comment. */
894 /* FIXME: Currently may writes null-bytes into the string. */
896 extract_quotelike_pass3 (token_ty *tp, int error_level)
899 static int bufmax = 0;
907 switch (tp->sub_type)
909 case string_type_verbatim:
910 fprintf (stderr, "Interpolating string_type_verbatim:\n");
913 fprintf (stderr, "Interpolating string_type_q:\n");
916 fprintf (stderr, "Interpolating string_type_qq:\n");
919 fprintf (stderr, "Interpolating string_type_qr:\n");
922 fprintf (stderr, "%s\n", tp->string);
923 if (tp->sub_type == string_type_verbatim)
924 fprintf (stderr, "---> %s\n", tp->string);
927 if (tp->sub_type == string_type_verbatim)
930 /* Loop over tp->string, accumulating the expansion in buffer. */
939 /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
940 if \Q modifier is present. */
941 if (bufpos + 7 > bufmax)
943 bufmax = 2 * bufmax + 10;
944 buffer = xrealloc (buffer, bufmax);
947 if (tp->sub_type == string_type_q)
955 buffer[bufpos++] = '\\';
960 buffer[bufpos++] = *crs++;
966 /* We only get here for double-quoted strings or regular expressions.
967 Unescape escape sequences. */
974 buffer[bufpos++] = '\t';
978 buffer[bufpos++] = '\n';
982 buffer[bufpos++] = '\r';
986 buffer[bufpos++] = '\f';
990 buffer[bufpos++] = '\b';
994 buffer[bufpos++] = '\a';
998 buffer[bufpos++] = 0x1b;
1000 case '0': case '1': case '2': case '3':
1001 case '4': case '5': case '6': case '7':
1003 unsigned int oct_number;
1006 crs = extract_oct (crs + 1, 3, &oct_number);
1008 /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
1009 true, the character should be converted to its uppercase
1010 resp. lowercase equivalent. I don't know if the necessary
1011 facilities are already included in gettext. For US-Ascii
1012 the conversion can be already be done, however. */
1013 if (uppercase && oct_number >= 'a' && oct_number <= 'z')
1015 oct_number = oct_number - 'a' + 'A';
1017 else if (lowercase && oct_number >= 'A' && oct_number <= 'Z')
1019 oct_number = oct_number - 'A' + 'a';
1023 /* Yes, octal escape sequences in the range 0x100..0x1ff are
1025 length = u8_uctomb ((unsigned char *) (buffer + bufpos),
1033 unsigned int hex_number = 0;
1039 const char *end = strchr (crs, '}');
1042 error_with_progname = false;
1043 error (error_level, 0, _("\
1044 %s:%d: missing right brace on \\x{HEXNUMBER}"), real_file_name, line_number);
1045 error_with_progname = true;
1052 (void) extract_hex (crs, end - crs, &hex_number);
1058 crs = extract_hex (crs, 2, &hex_number);
1061 /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
1062 true, the character should be converted to its uppercase
1063 resp. lowercase equivalent. I don't know if the necessary
1064 facilities are already included in gettext. For US-Ascii
1065 the conversion can be already be done, however. */
1066 if (uppercase && hex_number >= 'a' && hex_number <= 'z')
1068 hex_number = hex_number - 'a' + 'A';
1070 else if (lowercase && hex_number >= 'A' && hex_number <= 'Z')
1072 hex_number = hex_number - 'A' + 'a';
1075 length = u8_uctomb ((unsigned char *) (buffer + bufpos),
1083 /* Perl's notion of control characters. */
1087 int the_char = (unsigned char) *crs;
1088 if (the_char >= 'a' && the_char <= 'z')
1089 the_char = the_char - 'a' + 'A';
1090 buffer[bufpos++] = the_char ^ 0x40;
1097 const char *end = strchr (crs + 1, '}');
1101 unsigned int unicode;
1103 name = XNMALLOC (end - (crs + 1) + 1, char);
1104 memcpy (name, crs + 1, end - (crs + 1));
1105 name[end - (crs + 1)] = '\0';
1107 unicode = unicode_name_character (name);
1108 if (unicode != UNINAME_INVALID)
1110 /* FIXME: Convert to upper/lowercase if the
1111 corresponding flag is set to true. */
1113 u8_uctomb ((unsigned char *) (buffer + bufpos),
1128 /* No escape sequence, go on. */
1156 if (*crs >= 'A' && *crs <= 'Z')
1158 buffer[bufpos++] = *crs - 'A' + 'a';
1160 else if ((unsigned char) *crs >= 0x80)
1162 error_with_progname = false;
1163 error (error_level, 0, _("\
1164 %s:%d: invalid interpolation (\"\\l\") of 8bit character \"%c\""),
1165 real_file_name, line_number, *crs);
1166 error_with_progname = true;
1170 buffer[bufpos++] = *crs;
1176 if (*crs >= 'a' && *crs <= 'z')
1178 buffer[bufpos++] = *crs - 'a' + 'A';
1180 else if ((unsigned char) *crs >= 0x80)
1182 error_with_progname = false;
1183 error (error_level, 0, _("\
1184 %s:%d: invalid interpolation (\"\\u\") of 8bit character \"%c\""),
1185 real_file_name, line_number, *crs);
1186 error_with_progname = true;
1190 buffer[bufpos++] = *crs;
1195 buffer[bufpos++] = *crs;
1204 backslashed = false;
1207 && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z')
1208 || (*crs >= '0' && *crs <= '9') || *crs == '_'))
1210 buffer[bufpos++] = '\\';
1214 if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
1216 error_with_progname = false;
1217 error (error_level, 0, _("\
1218 %s:%d: invalid variable interpolation at \"%c\""),
1219 real_file_name, line_number, *crs);
1220 error_with_progname = true;
1225 if (*crs >= 'A' && *crs <= 'Z')
1226 buffer[bufpos++] = *crs - 'A' + 'a';
1227 else if ((unsigned char) *crs >= 0x80)
1229 error_with_progname = false;
1230 error (error_level, 0, _("\
1231 %s:%d: invalid interpolation (\"\\L\") of 8bit character \"%c\""),
1232 real_file_name, line_number, *crs);
1233 error_with_progname = true;
1234 buffer[bufpos++] = *crs;
1237 buffer[bufpos++] = *crs;
1242 if (*crs >= 'a' && *crs <= 'z')
1243 buffer[bufpos++] = *crs - 'a' + 'A';
1244 else if ((unsigned char) *crs >= 0x80)
1246 error_with_progname = false;
1247 error (error_level, 0, _("\
1248 %s:%d: invalid interpolation (\"\\U\") of 8bit character \"%c\""),
1249 real_file_name, line_number, *crs);
1250 error_with_progname = true;
1251 buffer[bufpos++] = *crs;
1254 buffer[bufpos++] = *crs;
1259 buffer[bufpos++] = *crs++;
1263 /* Ensure room for 1 more byte. */
1264 if (bufpos >= bufmax)
1266 bufmax = 2 * bufmax + 10;
1267 buffer = xrealloc (buffer, bufmax);
1270 buffer[bufpos++] = '\0';
1273 fprintf (stderr, "---> %s\n", buffer);
1276 /* Replace tp->string. */
1278 tp->string = xstrdup (buffer);
1281 /* Parse a variable. This is done in several steps:
1282 1) Consume all leading occurencies of '$', '@', '%', and '*'.
1283 2) Determine the name of the variable from the following input.
1284 3) Parse possible following hash keys or array indexes.
1287 extract_variable (message_list_ty *mlp, token_ty *tp, int first)
1289 static char *buffer;
1290 static int bufmax = 0;
1293 size_t varbody_length = 0;
1294 bool maybe_hash_deref = false;
1295 bool maybe_hash_value = false;
1297 tp->type = token_type_variable;
1300 fprintf (stderr, "%s:%d: extracting variable type '%c'\n",
1301 real_file_name, line_number, first);
1305 * 1) Consume dollars and so on (not euros ...). Unconditionally
1306 * accepting the hash sign (#) will maybe lead to inaccurate
1309 while (c == '$' || c == '*' || c == '#' || c == '@' || c == '%')
1311 if (bufpos >= bufmax)
1313 bufmax = 2 * bufmax + 10;
1314 buffer = xrealloc (buffer, bufmax);
1316 buffer[bufpos++] = c;
1322 tp->type = token_type_eof;
1326 /* Hash references are treated in a special way, when looking for
1328 if (buffer[0] == '$')
1331 maybe_hash_value = true;
1332 else if (bufpos == 2 && buffer[1] == '$')
1335 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
1336 || (c >= '0' && c <= '9')
1337 || c == '_' || c == ':' || c == '\'' || c >= 0x80))
1339 /* Special variable $$ for pid. */
1340 if (bufpos >= bufmax)
1342 bufmax = 2 * bufmax + 10;
1343 buffer = xrealloc (buffer, bufmax);
1345 buffer[bufpos++] = '\0';
1346 tp->string = xstrdup (buffer);
1348 fprintf (stderr, "%s:%d: is PID ($$)\n",
1349 real_file_name, line_number);
1356 maybe_hash_deref = true;
1362 * 2) Get the name of the variable. The first character is practically
1363 * arbitrary. Punctuation and numbers automagically put a variable
1364 * in the global namespace but that subtle difference is not interesting
1367 if (bufpos >= bufmax)
1369 bufmax = 2 * bufmax + 10;
1370 buffer = xrealloc (buffer, bufmax);
1374 /* Yuck, we cannot accept ${gettext} as a keyword... Except for
1375 * debugging purposes it is also harmless, that we suppress the
1376 * real name of the variable.
1379 fprintf (stderr, "%s:%d: braced {variable_name}\n",
1380 real_file_name, line_number);
1383 if (extract_balanced (mlp, token_type_rbrace, true, false,
1384 null_context, null_context_list_iterator,
1385 1, arglist_parser_alloc (mlp, NULL)))
1387 tp->type = token_type_eof;
1390 buffer[bufpos++] = c;
1394 while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
1395 || (c >= '0' && c <= '9')
1396 || c == '_' || c == ':' || c == '\'' || c >= 0x80)
1399 if (bufpos >= bufmax)
1401 bufmax = 2 * bufmax + 10;
1402 buffer = xrealloc (buffer, bufmax);
1404 buffer[bufpos++] = c;
1410 /* Probably some strange Perl variable like $`. */
1411 if (varbody_length == 0)
1414 if (c == EOF || is_whitespace (c))
1415 phase1_ungetc (c); /* Loser. */
1418 if (bufpos >= bufmax)
1420 bufmax = 2 * bufmax + 10;
1421 buffer = xrealloc (buffer, bufmax);
1423 buffer[bufpos++] = c;
1427 if (bufpos >= bufmax)
1429 bufmax = 2 * bufmax + 10;
1430 buffer = xrealloc (buffer, bufmax);
1432 buffer[bufpos++] = '\0';
1434 tp->string = xstrdup (buffer);
1437 fprintf (stderr, "%s:%d: complete variable name: %s\n",
1438 real_file_name, line_number, tp->string);
1442 * 3) If the following looks strange to you, this is valid Perl syntax:
1444 * $var = $$hashref # We can place a
1445 * # comment here and then ...
1446 * {key_into_hashref};
1448 * POD sections are not allowed but we leave complaints about
1449 * that to the compiler/interpreter.
1451 /* We only extract strings from the first hash key (if present). */
1453 if (maybe_hash_deref || maybe_hash_value)
1455 bool is_dereference = false;
1460 while (is_whitespace (c));
1464 int c2 = phase1_getc ();
1468 is_dereference = true;
1472 while (is_whitespace (c));
1474 else if (c2 != '\n')
1476 /* Discarding the newline is harmless here. The only
1477 special character recognized after a minus is greater-than
1478 for dereference. However, the sequence "-\n>" that we
1479 treat incorrectly here, is a syntax error. */
1484 if (maybe_hash_value && is_dereference)
1486 tp->type = token_type_object;
1488 fprintf (stderr, "%s:%d: first keys preceded by \"->\"\n",
1489 real_file_name, line_number);
1492 else if (maybe_hash_value)
1494 /* Fake it into a hash. */
1495 tp->string[0] = '%';
1498 /* Do NOT change that into else if (see above). */
1499 if ((maybe_hash_value || maybe_hash_deref) && c == '{')
1501 void *keyword_value;
1504 fprintf (stderr, "%s:%d: first keys preceded by '{'\n",
1505 real_file_name, line_number);
1508 if (hash_find_entry (&keywords, tp->string, strlen (tp->string),
1509 &keyword_value) == 0)
1511 /* TODO: Shouldn't we use the shapes of the keyword, instead
1512 of hardwiring argnum1 = 1 ?
1513 const struct callshapes *shapes =
1514 (const struct callshapes *) keyword_value;
1516 struct callshapes shapes;
1517 shapes.keyword = tp->string; /* XXX storage duration? */
1518 shapes.keyword_len = strlen (tp->string);
1520 shapes.shapes[0].argnum1 = 1;
1521 shapes.shapes[0].argnum2 = 0;
1522 shapes.shapes[0].argnumc = 0;
1523 shapes.shapes[0].argnum1_glib_context = false;
1524 shapes.shapes[0].argnum2_glib_context = false;
1525 shapes.shapes[0].argtotal = 0;
1526 string_list_init (&shapes.shapes[0].xcomments);
1529 /* Extract a possible string from the key. Before proceeding
1530 we check whether the open curly is followed by a symbol and
1531 then by a right curly. */
1532 flag_context_list_iterator_ty context_iter =
1533 flag_context_list_iterator (
1534 flag_context_list_table_lookup (
1535 flag_context_list_table,
1536 tp->string, strlen (tp->string)));
1537 token_ty *t1 = x_perl_lex (mlp);
1540 fprintf (stderr, "%s:%d: extracting string key\n",
1541 real_file_name, line_number);
1544 if (t1->type == token_type_symbol
1545 || t1->type == token_type_named_op)
1547 token_ty *t2 = x_perl_lex (mlp);
1548 if (t2->type == token_type_rbrace)
1550 flag_context_ty context;
1554 inherited_context (null_context,
1555 flag_context_list_iterator_advance (
1558 pos.line_number = line_number;
1559 pos.file_name = logical_file_name;
1561 xgettext_current_source_encoding = po_charset_utf8;
1562 remember_a_message (mlp, NULL, xstrdup (t1->string),
1563 context, &pos, NULL, savable_comment);
1564 xgettext_current_source_encoding = xgettext_global_source_encoding;
1576 if (extract_balanced (mlp, token_type_rbrace, true, false,
1577 null_context, context_iter,
1578 1, arglist_parser_alloc (mlp, &shapes)))
1594 /* Now consume "->", "[...]", and "{...}". */
1597 int c = phase2_getc ();
1604 fprintf (stderr, "%s:%d: extracting balanced '{' after varname\n",
1605 real_file_name, line_number);
1607 extract_balanced (mlp, token_type_rbrace, true, false,
1608 null_context, null_context_list_iterator,
1609 1, arglist_parser_alloc (mlp, NULL));
1614 fprintf (stderr, "%s:%d: extracting balanced '[' after varname\n",
1615 real_file_name, line_number);
1617 extract_balanced (mlp, token_type_rbracket, true, false,
1618 null_context, null_context_list_iterator,
1619 1, arglist_parser_alloc (mlp, NULL));
1623 c2 = phase1_getc ();
1627 fprintf (stderr, "%s:%d: another \"->\" after varname\n",
1628 real_file_name, line_number);
1632 else if (c2 != '\n')
1634 /* Discarding the newline is harmless here. The only
1635 special character recognized after a minus is greater-than
1636 for dereference. However, the sequence "-\n>" that we
1637 treat incorrectly here, is a syntax error. */
1644 fprintf (stderr, "%s:%d: variable finished\n",
1645 real_file_name, line_number);
1653 /* Actually a simplified version of extract_variable(). It searches for
1654 variables inside a double-quoted string that may interpolate to
1655 some keyword hash (reference). The string is UTF-8 encoded. */
1657 interpolate_keywords (message_list_ty *mlp, const char *string, int lineno)
1659 static char *buffer;
1660 static int bufmax = 0;
1662 flag_context_ty context;
1664 bool maybe_hash_deref = false;
1686 * one_dollar: dollar sign seen in state INITIAL
1687 * two_dollars: another dollar-sign has been seen in state ONE_DOLLAR
1688 * identifier: a valid identifier character has been seen in state
1689 * ONE_DOLLAR or TWO_DOLLARS
1690 * minus: a minus-sign has been seen in state IDENTIFIER
1691 * wait_lbrace: a greater-than has been seen in state MINUS
1692 * wait_quote: a left brace has been seen in state IDENTIFIER or in
1694 * dquote: a double-quote has been seen in state WAIT_QUOTE
1695 * squote: a single-quote has been seen in state WAIT_QUOTE
1696 * barekey: an bareword character has been seen in state WAIT_QUOTE
1697 * wait_rbrace: closing quote has been seen in state DQUOTE or SQUOTE
1699 * In the states initial...identifier the context is null_context; in the
1700 * states minus...wait_rbrace the context is the one suitable for the first
1701 * argument of the last seen identifier.
1704 context = null_context;
1706 token.type = token_type_string;
1707 token.sub_type = string_type_qq;
1708 token.line_number = line_number;
1709 /* No need for token.comment = add_reference (savable_comment); here.
1710 We can let token.comment uninitialized here, and use savable_comment
1711 directly, because this function only parses the given string and does
1712 not call phase2_getc. */
1713 pos.file_name = logical_file_name;
1714 pos.line_number = lineno;
1716 while ((c = (unsigned char) *string++) != '\0')
1718 void *keyword_value;
1720 if (state == initial)
1726 if (bufpos + 1 >= bufmax)
1728 bufmax = 2 * bufmax + 10;
1729 buffer = xrealloc (buffer, bufmax);
1738 c = (unsigned char) *string++;
1743 buffer[bufpos++] = '$';
1744 maybe_hash_deref = false;
1756 * This is enough to make us believe later that we dereference
1759 maybe_hash_deref = true;
1760 state = two_dollars;
1763 if (c == '_' || c == ':' || c == '\'' || c >= 0x80
1764 || (c >= 'A' && c <= 'Z')
1765 || (c >= 'a' && c <= 'z')
1766 || (c >= '0' && c <= '9'))
1768 buffer[bufpos++] = c;
1777 if (c == '_' || c == ':' || c == '\'' || c >= 0x80
1778 || (c >= 'A' && c <= 'Z')
1779 || (c >= 'a' && c <= 'z')
1780 || (c >= '0' && c <= '9'))
1782 buffer[bufpos++] = c;
1792 if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value)
1795 flag_context_list_iterator_ty context_iter =
1796 flag_context_list_iterator (
1797 flag_context_list_table_lookup (
1798 flag_context_list_table,
1801 inherited_context (null_context,
1802 flag_context_list_iterator_advance (
1810 if (!maybe_hash_deref)
1812 if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value)
1815 flag_context_list_iterator_ty context_iter =
1816 flag_context_list_iterator (
1817 flag_context_list_table_lookup (
1818 flag_context_list_table,
1821 inherited_context (null_context,
1822 flag_context_list_iterator_advance (
1830 if (c == '_' || c == ':' || c == '\'' || c >= 0x80
1831 || (c >= 'A' && c <= 'Z')
1832 || (c >= 'a' && c <= 'z')
1833 || (c >= '0' && c <= '9'))
1835 buffer[bufpos++] = c;
1846 state = wait_lbrace;
1849 context = null_context;
1861 context = null_context;
1872 pos.line_number = lineno;
1877 pos.line_number = lineno;
1882 if (c == '_' || (c >= '0' && c <= '9') || c >= 0x80
1883 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
1885 pos.line_number = lineno;
1887 buffer[bufpos++] = c;
1892 context = null_context;
1902 /* The resulting string has to be interpolated twice. */
1903 buffer[bufpos] = '\0';
1904 token.string = xstrdup (buffer);
1905 extract_quotelike_pass3 (&token, EXIT_FAILURE);
1906 /* The string can only shrink with interpolation (because
1908 if (!(strlen (token.string) <= bufpos))
1910 strcpy (buffer, token.string);
1911 free (token.string);
1912 state = wait_rbrace;
1915 if (string[0] == '\"')
1917 buffer[bufpos++] = string++[0];
1921 buffer[bufpos++] = '\\';
1922 buffer[bufpos++] = string++[0];
1926 context = null_context;
1931 buffer[bufpos++] = c;
1939 state = wait_rbrace;
1942 if (string[0] == '\'')
1944 buffer[bufpos++] = string++[0];
1948 buffer[bufpos++] = '\\';
1949 buffer[bufpos++] = string++[0];
1953 context = null_context;
1958 buffer[bufpos++] = c;
1963 if (c == '_' || (c >= '0' && c <= '9') || c >= 0x80
1964 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
1966 buffer[bufpos++] = c;
1969 else if (is_whitespace (c))
1971 state = wait_rbrace;
1976 context = null_context;
1980 /* Must be right brace. */
1988 buffer[bufpos] = '\0';
1989 token.string = xstrdup (buffer);
1990 extract_quotelike_pass3 (&token, EXIT_FAILURE);
1991 xgettext_current_source_encoding = po_charset_utf8;
1992 remember_a_message (mlp, NULL, token.string, context, &pos,
1993 NULL, savable_comment);
1994 xgettext_current_source_encoding = xgettext_global_source_encoding;
1997 context = null_context;
2006 /* There is an ambiguity about '/' and '?': They can start an operator
2007 (division operator '/' or '/=' or the conditional operator '?'), or they can
2008 start a regular expression. The distinction is important because inside
2009 regular expressions, '#' loses its special meaning. This function helps
2010 making the decision (a heuristic). See the documentation for details. */
2012 prefer_regexp_over_division (token_type_ty type)
2018 case token_type_eof:
2021 case token_type_lparen:
2024 case token_type_rparen:
2027 case token_type_comma:
2030 case token_type_fat_comma:
2033 case token_type_dereference:
2036 case token_type_semicolon:
2039 case token_type_lbrace:
2042 case token_type_rbrace:
2045 case token_type_lbracket:
2048 case token_type_rbracket:
2051 case token_type_string:
2054 case token_type_number:
2057 case token_type_named_op:
2060 case token_type_variable:
2063 case token_type_object:
2066 case token_type_symbol:
2067 case token_type_keyword_symbol:
2070 case token_type_regex_op:
2073 case token_type_dot:
2076 case token_type_other:
2084 fprintf (stderr, "Prefer regexp over division after %s: %s\n",
2085 token2string (&ty), retval ? "true" : "false");
2091 /* Last token type seen in the stream. Important for the interpretation
2092 of slash and question mark. */
2093 static token_type_ty last_token_type;
2095 /* Combine characters into tokens. Discard whitespace. */
2098 x_perl_prelex (message_list_ty *mlp, token_ty *tp)
2100 static char *buffer;
2108 tp->line_number = line_number;
2109 tp->last_type = last_token_type;
2114 tp->type = token_type_eof;
2118 if (last_non_comment_line > last_comment_line)
2119 savable_comment_reset ();
2123 /* Ignore whitespace. */
2132 extract_variable (mlp, tp, c);
2138 last_non_comment_line = tp->line_number;
2144 int c2 = phase1_getc ();
2148 tp->type = token_type_other;
2151 else if (!(c2 >= '0' && c2 <= '9'))
2153 tp->type = token_type_dot;
2158 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2159 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2160 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2161 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2164 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2165 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2166 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2167 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2169 case '0': case '1': case '2': case '3': case '4':
2170 case '5': case '6': case '7': case '8': case '9':
2171 /* Symbol, or part of a number. */
2175 if (bufpos >= bufmax)
2177 bufmax = 2 * bufmax + 10;
2178 buffer = xrealloc (buffer, bufmax);
2180 buffer[bufpos++] = c;
2184 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2185 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2186 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2187 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2190 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2191 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2192 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2193 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2195 case '0': case '1': case '2': case '3': case '4':
2196 case '5': case '6': case '7': case '8': case '9':
2205 if (bufpos >= bufmax)
2207 bufmax = 2 * bufmax + 10;
2208 buffer = xrealloc (buffer, bufmax);
2210 buffer[bufpos] = '\0';
2212 if (strcmp (buffer, "__END__") == 0
2213 || strcmp (buffer, "__DATA__") == 0)
2216 tp->type = token_type_eof;
2219 else if (strcmp (buffer, "and") == 0
2220 || strcmp (buffer, "cmp") == 0
2221 || strcmp (buffer, "eq") == 0
2222 || strcmp (buffer, "if") == 0
2223 || strcmp (buffer, "ge") == 0
2224 || strcmp (buffer, "gt") == 0
2225 || strcmp (buffer, "le") == 0
2226 || strcmp (buffer, "lt") == 0
2227 || strcmp (buffer, "ne") == 0
2228 || strcmp (buffer, "not") == 0
2229 || strcmp (buffer, "or") == 0
2230 || strcmp (buffer, "unless") == 0
2231 || strcmp (buffer, "while") == 0
2232 || strcmp (buffer, "xor") == 0)
2234 tp->type = token_type_named_op;
2235 tp->string = xstrdup (buffer);
2238 else if (strcmp (buffer, "s") == 0
2239 || strcmp (buffer, "y") == 0
2240 || strcmp (buffer, "tr") == 0)
2242 int delim = phase1_getc ();
2244 while (is_whitespace (delim))
2245 delim = phase2_getc ();
2249 tp->type = token_type_eof;
2252 if ((delim >= '0' && delim <= '9')
2253 || (delim >= 'A' && delim <= 'Z')
2254 || (delim >= 'a' && delim <= 'z'))
2256 /* False positive. */
2257 phase2_ungetc (delim);
2258 tp->type = token_type_symbol;
2259 tp->sub_type = symbol_type_none;
2260 tp->string = xstrdup (buffer);
2263 extract_triple_quotelike (mlp, tp, delim,
2264 buffer[0] == 's' && delim != '\'');
2266 /* Eat the following modifiers. */
2269 while (c >= 'a' && c <= 'z');
2273 else if (strcmp (buffer, "m") == 0)
2275 int delim = phase1_getc ();
2277 while (is_whitespace (delim))
2278 delim = phase2_getc ();
2282 tp->type = token_type_eof;
2285 if ((delim >= '0' && delim <= '9')
2286 || (delim >= 'A' && delim <= 'Z')
2287 || (delim >= 'a' && delim <= 'z'))
2289 /* False positive. */
2290 phase2_ungetc (delim);
2291 tp->type = token_type_symbol;
2292 tp->sub_type = symbol_type_none;
2293 tp->string = xstrdup (buffer);
2296 extract_quotelike (tp, delim);
2298 interpolate_keywords (mlp, tp->string, line_number);
2300 drop_reference (tp->comment);
2301 tp->type = token_type_regex_op;
2303 /* Eat the following modifiers. */
2306 while (c >= 'a' && c <= 'z');
2310 else if (strcmp (buffer, "qq") == 0
2311 || strcmp (buffer, "q") == 0
2312 || strcmp (buffer, "qx") == 0
2313 || strcmp (buffer, "qw") == 0
2314 || strcmp (buffer, "qr") == 0)
2316 /* The qw (...) construct is not really a string but we
2317 can treat in the same manner and then pretend it is
2318 a symbol. Rationale: Saying "qw (foo bar)" is the
2319 same as "my @list = ('foo', 'bar'); @list;". */
2321 int delim = phase1_getc ();
2323 while (is_whitespace (delim))
2324 delim = phase2_getc ();
2328 tp->type = token_type_eof;
2332 if ((delim >= '0' && delim <= '9')
2333 || (delim >= 'A' && delim <= 'Z')
2334 || (delim >= 'a' && delim <= 'z'))
2336 /* False positive. */
2337 phase2_ungetc (delim);
2338 tp->type = token_type_symbol;
2339 tp->sub_type = symbol_type_none;
2340 tp->string = xstrdup (buffer);
2344 extract_quotelike (tp, delim);
2350 tp->type = token_type_string;
2351 tp->sub_type = string_type_qq;
2352 interpolate_keywords (mlp, tp->string, line_number);
2355 drop_reference (tp->comment);
2356 tp->type = token_type_regex_op;
2359 drop_reference (tp->comment);
2360 tp->type = token_type_symbol;
2361 tp->sub_type = symbol_type_none;
2364 tp->type = token_type_string;
2365 tp->sub_type = string_type_q;
2372 else if ((buffer[0] >= '0' && buffer[0] <= '9') || buffer[0] == '.')
2374 tp->type = token_type_number;
2377 tp->type = token_type_symbol;
2378 tp->sub_type = (strcmp (buffer, "sub") == 0
2380 : symbol_type_none);
2381 tp->string = xstrdup (buffer);
2385 extract_quotelike (tp, c);
2386 tp->sub_type = string_type_qq;
2387 interpolate_keywords (mlp, tp->string, line_number);
2391 extract_quotelike (tp, c);
2392 tp->sub_type = string_type_qq;
2393 interpolate_keywords (mlp, tp->string, line_number);
2397 extract_quotelike (tp, c);
2398 tp->sub_type = string_type_q;
2404 /* Ignore empty list. */
2408 tp->type = token_type_lparen;
2412 tp->type = token_type_rparen;
2416 tp->type = token_type_lbrace;
2420 tp->type = token_type_rbrace;
2424 tp->type = token_type_lbracket;
2428 tp->type = token_type_rbracket;
2432 tp->type = token_type_semicolon;
2436 tp->type = token_type_comma;
2440 /* Check for fat comma. */
2444 tp->type = token_type_fat_comma;
2447 else if (linepos == 2
2448 && (last_token_type == token_type_semicolon
2449 || last_token_type == token_type_rbrace)
2450 && ((c >= 'A' && c <='Z')
2451 || (c >= 'a' && c <= 'z')))
2454 fprintf (stderr, "%s:%d: start pod section\n",
2455 real_file_name, line_number);
2459 fprintf (stderr, "%s:%d: end pod section\n",
2460 real_file_name, line_number);
2465 tp->type = token_type_other;
2469 /* Check for <<EOF and friends. */
2477 extract_quotelike (tp, c);
2478 string = get_here_document (tp->string);
2480 tp->string = string;
2481 tp->type = token_type_string;
2482 tp->sub_type = string_type_verbatim;
2483 tp->line_number = line_number + 1;
2489 extract_quotelike (tp, c);
2490 string = get_here_document (tp->string);
2492 tp->string = string;
2493 tp->type = token_type_string;
2494 tp->sub_type = string_type_qq;
2495 tp->line_number = line_number + 1;
2496 interpolate_keywords (mlp, tp->string, tp->line_number);
2499 else if ((c >= 'A' && c <= 'Z')
2500 || (c >= 'a' && c <= 'z')
2504 while ((c >= 'A' && c <= 'Z')
2505 || (c >= 'a' && c <= 'z')
2506 || (c >= '0' && c <= '9')
2507 || c == '_' || c >= 0x80)
2509 if (bufpos >= bufmax)
2511 bufmax = 2 * bufmax + 10;
2512 buffer = xrealloc (buffer, bufmax);
2514 buffer[bufpos++] = c;
2519 tp->type = token_type_eof;
2526 if (bufpos >= bufmax)
2528 bufmax = 2 * bufmax + 10;
2529 buffer = xrealloc (buffer, bufmax);
2531 buffer[bufpos++] = '\0';
2532 string = get_here_document (buffer);
2533 tp->string = string;
2534 tp->type = token_type_string;
2535 tp->sub_type = string_type_qq;
2536 tp->comment = add_reference (savable_comment);
2537 tp->line_number = line_number + 1;
2538 interpolate_keywords (mlp, tp->string, tp->line_number);
2544 tp->type = token_type_other;
2551 tp->type = token_type_other;
2553 return; /* End of case '>'. */
2556 /* Check for dereferencing operator. */
2560 tp->type = token_type_dereference;
2563 else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2565 /* One of the -X (filetest) functions. We play safe
2566 and accept all alphabetical characters here. */
2567 tp->type = token_type_other;
2571 tp->type = token_type_other;
2576 if (prefer_regexp_over_division (tp->last_type))
2578 extract_quotelike (tp, c);
2579 interpolate_keywords (mlp, tp->string, line_number);
2581 drop_reference (tp->comment);
2582 tp->type = token_type_regex_op;
2583 /* Eat the following modifiers. */
2586 while (c >= 'a' && c <= 'z');
2590 /* Recognize operator '//'. */
2600 /* We could carefully recognize each of the 2 and 3 character
2601 operators, but it is not necessary, except for the '//' operator,
2602 as we only need to recognize gettext invocations. Don't
2604 tp->type = token_type_other;
2611 /* A token stack used as a lookahead buffer. */
2613 typedef struct token_stack_ty token_stack_ty;
2614 struct token_stack_ty
2621 static struct token_stack_ty token_stack;
2624 /* Dumps all resources allocated by stack STACK. */
2626 token_stack_dump (token_stack_ty *stack)
2630 fprintf (stderr, "BEGIN STACK DUMP\n");
2631 for (i = 0; i < stack->nitems; i++)
2633 token_ty *token = stack->items[i];
2634 fprintf (stderr, " [%s]\n", token2string (token));
2635 switch (token->type)
2637 case token_type_named_op:
2638 case token_type_string:
2639 case token_type_symbol:
2640 case token_type_variable:
2641 fprintf (stderr, " string: %s\n", token->string);
2643 case token_type_object:
2644 fprintf (stderr, " string: %s->\n", token->string);
2649 fprintf (stderr, "END STACK DUMP\n");
2654 /* Pushes the token TOKEN onto the stack STACK. */
2656 token_stack_push (token_stack_ty *stack, token_ty *token)
2658 if (stack->nitems >= stack->nitems_max)
2662 stack->nitems_max = 2 * stack->nitems_max + 4;
2663 nbytes = stack->nitems_max * sizeof (token_ty *);
2664 stack->items = xrealloc (stack->items, nbytes);
2666 stack->items[stack->nitems++] = token;
2669 /* Pops the most recently pushed token from the stack STACK and returns it.
2670 Returns NULL if the stack is empty. */
2671 static inline token_ty *
2672 token_stack_pop (token_stack_ty *stack)
2674 if (stack->nitems > 0)
2675 return stack->items[--(stack->nitems)];
2680 /* Return the top of the stack without removing it from the stack, or
2681 NULL if the stack is empty. */
2682 static inline token_ty *
2683 token_stack_peek (const token_stack_ty *stack)
2685 if (stack->nitems > 0)
2686 return stack->items[stack->nitems - 1];
2691 /* Frees all resources allocated by stack STACK. */
2693 token_stack_free (token_stack_ty *stack)
2697 for (i = 0; i < stack->nitems; i++)
2698 free_token (stack->items[i]);
2699 free (stack->items);
2704 x_perl_lex (message_list_ty *mlp)
2707 int dummy = token_stack_dump (&token_stack);
2709 token_ty *tp = token_stack_pop (&token_stack);
2713 tp = XMALLOC (token_ty);
2714 x_perl_prelex (mlp, tp);
2715 tp->last_type = last_token_type;
2716 last_token_type = tp->type;
2719 fprintf (stderr, "%s:%d: x_perl_prelex returned %s\n",
2720 real_file_name, line_number, token2string (tp));
2723 /* The interpretation of a slash or question mark after a function call
2724 depends on the prototype of that function. If the function expects
2725 at least one argument, a regular expression is preferred, otherwise
2726 an operator. With our limited means, we can only guess here. If
2727 the function is a builtin that takes no arguments, we prefer an
2728 operator by silently turning the last symbol into a variable instead
2731 Method calls without parentheses are not ambiguous. After them, an
2732 operator must follow. Due to some ideosyncrasies in this parser
2733 they are treated in two different manners. If the call is
2734 chained ($foo->bar->baz) the token left of the symbol is a
2735 dereference operator. If it is not chained ($foo->bar) the
2736 dereference operator is consumed with the extracted variable. The
2737 latter case is handled below. */
2738 if (tp->type == token_type_symbol)
2740 if (tp->last_type == token_type_dereference)
2742 /* Class method call or chained method call (with at least
2743 two arrow operators). */
2744 last_token_type = token_type_variable;
2746 else if (tp->last_type == token_type_object)
2748 /* Instance method, not chained. */
2749 last_token_type = token_type_variable;
2751 else if (strcmp (tp->string, "wantarray") == 0
2752 || strcmp (tp->string, "fork") == 0
2753 || strcmp (tp->string, "getlogin") == 0
2754 || strcmp (tp->string, "getppid") == 0
2755 || strcmp (tp->string, "getpwent") == 0
2756 || strcmp (tp->string, "getgrent") == 0
2757 || strcmp (tp->string, "gethostent") == 0
2758 || strcmp (tp->string, "getnetent") == 0
2759 || strcmp (tp->string, "getprotoent") == 0
2760 || strcmp (tp->string, "getservent") == 0
2761 || strcmp (tp->string, "setpwent") == 0
2762 || strcmp (tp->string, "setgrent") == 0
2763 || strcmp (tp->string, "endpwent") == 0
2764 || strcmp (tp->string, "endgrent") == 0
2765 || strcmp (tp->string, "endhostent") == 0
2766 || strcmp (tp->string, "endnetent") == 0
2767 || strcmp (tp->string, "endprotoent") == 0
2768 || strcmp (tp->string, "endservent") == 0
2769 || strcmp (tp->string, "time") == 0
2770 || strcmp (tp->string, "times") == 0
2771 || strcmp (tp->string, "wait") == 0
2772 || strcmp (tp->string, "wantarray") == 0)
2774 /* A Perl built-in function that does not accept arguments. */
2775 last_token_type = token_type_variable;
2782 fprintf (stderr, "%s:%d: %s recycled from stack\n",
2783 real_file_name, line_number, token2string (tp));
2787 /* A symbol followed by a fat comma is really a single-quoted string.
2788 Function definitions or forward declarations also need a special
2789 handling because the dollars and at signs inside the parentheses
2790 must not be interpreted as the beginning of a variable ')'. */
2791 if (tp->type == token_type_symbol || tp->type == token_type_named_op)
2793 token_ty *next = token_stack_peek (&token_stack);
2798 fprintf (stderr, "%s:%d: pre-fetching next token\n",
2799 real_file_name, line_number);
2801 next = x_perl_lex (mlp);
2802 x_perl_unlex (next);
2804 fprintf (stderr, "%s:%d: unshifted next token\n",
2805 real_file_name, line_number);
2810 fprintf (stderr, "%s:%d: next token is %s\n",
2811 real_file_name, line_number, token2string (next));
2814 if (next->type == token_type_fat_comma)
2816 tp->type = token_type_string;
2817 tp->sub_type = string_type_q;
2818 tp->comment = add_reference (savable_comment);
2821 "%s:%d: token %s mutated to token_type_string\n",
2822 real_file_name, line_number, token2string (tp));
2825 else if (tp->type == token_type_symbol && tp->sub_type == symbol_type_sub
2826 && next->type == token_type_symbol)
2828 /* Start of a function declaration or definition. Mark this
2829 symbol as a function name, so that we can later eat up
2830 possible prototype information. */
2832 fprintf (stderr, "%s:%d: subroutine declaration/definition '%s'\n",
2833 real_file_name, line_number, next->string);
2835 next->sub_type = symbol_type_function;
2837 else if (tp->type == token_type_symbol
2838 && (tp->sub_type == symbol_type_sub
2839 || tp->sub_type == symbol_type_function)
2840 && next->type == token_type_lparen)
2842 /* For simplicity we simply consume everything up to the
2843 closing parenthesis. Actually only a limited set of
2844 characters is allowed inside parentheses but we leave
2845 complaints to the interpreter and are prepared for
2846 future extensions to the Perl syntax. */
2850 fprintf (stderr, "%s:%d: consuming prototype information\n",
2851 real_file_name, line_number);
2858 fprintf (stderr, " consuming character '%c'\n", c);
2861 while (c != EOF && c != ')');
2870 x_perl_unlex (token_ty *tp)
2872 token_stack_push (&token_stack, tp);
2876 /* ========================= Extracting strings. ========================== */
2878 /* Assuming TP is a string token, this function accumulates all subsequent
2879 . string2 . string3 ... to the string. (String concatenation.) */
2882 collect_message (message_list_ty *mlp, token_ty *tp, int error_level)
2887 extract_quotelike_pass3 (tp, error_level);
2888 string = xstrdup (tp->string);
2889 len = strlen (tp->string) + 1;
2897 while (is_whitespace (c));
2907 while (is_whitespace (c));
2911 if (c == '"' || c == '\'' || c == '`'
2912 || ((c == '/' || c == '?')
2913 && prefer_regexp_over_division (tp->last_type))
2916 token_ty *qstring = x_perl_lex (mlp);
2917 if (qstring->type != token_type_string)
2919 /* assert (qstring->type == token_type_symbol) */
2920 x_perl_unlex (qstring);
2924 extract_quotelike_pass3 (qstring, error_level);
2925 len += strlen (qstring->string);
2926 string = xrealloc (string, len);
2927 strcat (string, qstring->string);
2928 free_token (qstring);
2933 /* The file is broken into tokens. Scan the token stream, looking for
2934 a keyword, followed by a left paren, followed by a string. When we
2935 see this sequence, we have something to remember. We assume we are
2936 looking at a valid Perl program, and leave the complaints about
2937 the grammar to the compiler.
2939 Normal handling: Look for
2940 keyword ( ... msgid ... )
2941 Plural handling: Look for
2942 keyword ( ... msgid ... msgid_plural ... )
2944 We use recursion because the arguments before msgid or between msgid
2945 and msgid_plural can contain subexpressions of the same form.
2947 In Perl, parentheses around function arguments can be omitted.
2949 The general rules are:
2950 1) Functions declared with a prototype take exactly the specified number
2952 sub one_arg ($) { ... }
2953 sub two_args ($$) { ... }
2954 2) When a function name is immediately followed by an opening parenthesis,
2955 the argument list ends at the corresponding closing parenthesis.
2957 If rule 1 and rule 2 are contradictory, i.e. when the program calls a
2958 function with an explicit argument list and the wrong number of arguments,
2959 the program is invalid:
2960 sub two_args ($$) { ... }
2961 foo two_args (x), y - invalid due to rules 1 and 2
2963 Ambiguities are resolved as follows:
2964 3) Some built-ins, such as 'abs', 'sqrt', 'sin', 'cos', ..., and functions
2965 declared with a prototype of exactly one argument take exactly one
2967 foo sin x, y ==> foo (sin (x), y)
2968 sub one_arg ($) { ... }
2969 foo one_arg x, y, z ==> foo (one_arg (x), y, z)
2970 4) Other identifiers, if not immediately followed by an opening
2971 parenthesis, consume the entire remaining argument list:
2972 foo bar x, y ==> foo (bar (x, y))
2973 sub two_args ($$) { ... }
2974 foo two_args x, y ==> foo (two_args (x, y))
2976 Other series of comma separated expressions without a function name at
2977 the beginning are comma expressions:
2978 sub two_args ($$) { ... }
2979 foo two_args x, (y, z) ==> foo (two_args (x, (y, z)))
2980 Note that the evaluation of comma expressions returns a list of values
2981 when in list context (e.g. inside the argument list of a function without
2982 prototype) but only one value when inside the argument list of a function
2984 sub print3 ($$$) { print @_ }
2985 print3 5, (6, 7), 8 ==> 578
2986 print 5, (6, 7), 8 ==> 5678
2988 Where rule 3 or 4 contradict rule 1 or 2, the program is invalid:
2989 sin (x, y) - invalid due to rules 2 and 3
2990 sub one_arg ($) { ... }
2991 one_arg (x, y) - invalid due to rules 2 and 3
2992 sub two_args ($$) { ... }
2993 foo two_args x, y, z - invalid due to rules 1 and 4
2996 /* Extract messages until the next balanced closing parenthesis.
2997 Extracted messages are added to MLP.
2999 DELIM can be either token_type_rbrace, token_type_rbracket,
3000 token_type_rparen. Additionally, if COMMA_DELIM is true, parsing
3001 stops at the next comma outside parentheses.
3003 ARG is the current argument list position, starts with 1.
3004 ARGPARSER is the corresponding argument list parser.
3006 Returns true for EOF, false otherwise. */
3009 extract_balanced (message_list_ty *mlp,
3010 token_type_ty delim, bool eat_delim, bool comma_delim,
3011 flag_context_ty outer_context,
3012 flag_context_list_iterator_ty context_iter,
3013 int arg, struct arglist_parser *argparser)
3015 /* Whether to implicitly assume the next tokens are arguments even without
3017 bool next_is_argument = false;
3018 /* Parameters of the keyword just seen. Defined only when next_is_argument
3020 const struct callshapes *next_shapes = NULL;
3021 struct arglist_parser *next_argparser = NULL;
3023 /* Whether to not consider strings until the next comma. */
3024 bool skip_until_comma = false;
3026 /* Context iterator that will be used if the next token is a '('. */
3027 flag_context_list_iterator_ty next_context_iter =
3028 passthrough_context_list_iterator;
3029 /* Current context. */
3030 flag_context_ty inner_context =
3031 inherited_context (outer_context,
3032 flag_context_list_iterator_advance (&context_iter));
3035 static int nesting_level = 0;
3042 /* The current token. */
3045 tp = x_perl_lex (mlp);
3047 if (delim == tp->type)
3049 xgettext_current_source_encoding = po_charset_utf8;
3050 arglist_parser_done (argparser, arg);
3051 xgettext_current_source_encoding = xgettext_global_source_encoding;
3052 if (next_argparser != NULL)
3053 free (next_argparser);
3055 fprintf (stderr, "%s:%d: extract_balanced finished (%d)\n",
3056 logical_file_name, tp->line_number, --nesting_level);
3061 /* Preserve the delimiter for the caller. */
3066 if (comma_delim && tp->type == token_type_comma)
3068 xgettext_current_source_encoding = po_charset_utf8;
3069 arglist_parser_done (argparser, arg);
3070 xgettext_current_source_encoding = xgettext_global_source_encoding;
3071 if (next_argparser != NULL)
3072 free (next_argparser);
3074 fprintf (stderr, "%s:%d: extract_balanced finished at comma (%d)\n",
3075 logical_file_name, tp->line_number, --nesting_level);
3081 if (next_is_argument && tp->type != token_type_lparen)
3083 /* An argument list starts, even though there is no '('. */
3084 bool next_comma_delim;
3088 if (next_shapes != NULL)
3089 /* We know something about the function being called. Assume
3090 that it consumes only one argument if no argument number or
3091 total > 1 is specified. */
3095 next_comma_delim = true;
3096 for (i = 0; i < next_shapes->nshapes; i++)
3098 const struct callshape *shape = &next_shapes->shapes[i];
3100 if (shape->argnum1 > 1
3101 || shape->argnum2 > 1
3102 || shape->argnumc > 1
3103 || shape->argtotal > 1)
3104 next_comma_delim = false;
3108 /* We know nothing about the function being called. It could be
3109 a function prototyped to take only one argument, or on the other
3110 hand it could be prototyped to take more than one argument or an
3111 arbitrary argument list or it could be unprototyped. Due to
3112 the way the parser works, assuming the first case gives the
3114 next_comma_delim = true;
3116 if (extract_balanced (mlp, delim, false, next_comma_delim,
3117 inner_context, next_context_iter,
3120 xgettext_current_source_encoding = po_charset_utf8;
3121 arglist_parser_done (argparser, arg);
3122 xgettext_current_source_encoding = xgettext_global_source_encoding;
3126 next_is_argument = false;
3127 next_argparser = NULL;
3128 next_context_iter = null_context_list_iterator;
3134 case token_type_symbol:
3135 case token_type_keyword_symbol:
3137 fprintf (stderr, "%s:%d: type symbol (%d) \"%s\"\n",
3138 logical_file_name, tp->line_number, nesting_level,
3143 void *keyword_value;
3145 if (hash_find_entry (&keywords, tp->string, strlen (tp->string),
3146 &keyword_value) == 0)
3148 const struct callshapes *shapes =
3149 (const struct callshapes *) keyword_value;
3151 next_shapes = shapes;
3152 next_argparser = arglist_parser_alloc (mlp, shapes);
3157 next_argparser = arglist_parser_alloc (mlp, NULL);
3160 next_is_argument = true;
3162 flag_context_list_iterator (
3163 flag_context_list_table_lookup (
3164 flag_context_list_table,
3165 tp->string, strlen (tp->string)));
3168 case token_type_variable:
3170 fprintf (stderr, "%s:%d: type variable (%d) \"%s\"\n",
3171 logical_file_name, tp->line_number, nesting_level,
3174 next_is_argument = false;
3175 if (next_argparser != NULL)
3176 free (next_argparser);
3177 next_argparser = NULL;
3178 next_context_iter = null_context_list_iterator;
3181 case token_type_object:
3183 fprintf (stderr, "%s:%d: type object (%d) \"%s->\"\n",
3184 logical_file_name, tp->line_number, nesting_level,
3187 next_is_argument = false;
3188 if (next_argparser != NULL)
3189 free (next_argparser);
3190 next_argparser = NULL;
3191 next_context_iter = null_context_list_iterator;
3194 case token_type_lparen:
3196 fprintf (stderr, "%s:%d: type left parenthesis (%d)\n",
3197 logical_file_name, tp->line_number, nesting_level);
3199 if (next_is_argument)
3201 /* Parse the argument list of a function call. */
3202 if (extract_balanced (mlp, token_type_rparen, true, false,
3203 inner_context, next_context_iter,
3206 xgettext_current_source_encoding = po_charset_utf8;
3207 arglist_parser_done (argparser, arg);
3208 xgettext_current_source_encoding = xgettext_global_source_encoding;
3211 next_is_argument = false;
3212 next_argparser = NULL;
3216 /* Parse a parenthesized expression or comma expression. */
3217 if (extract_balanced (mlp, token_type_rparen, true, false,
3218 inner_context, next_context_iter,
3219 arg, arglist_parser_clone (argparser)))
3221 xgettext_current_source_encoding = po_charset_utf8;
3222 arglist_parser_done (argparser, arg);
3223 xgettext_current_source_encoding = xgettext_global_source_encoding;
3224 if (next_argparser != NULL)
3225 free (next_argparser);
3229 next_is_argument = false;
3230 if (next_argparser != NULL)
3231 free (next_argparser);
3232 next_argparser = NULL;
3234 skip_until_comma = true;
3235 next_context_iter = null_context_list_iterator;
3238 case token_type_rparen:
3240 fprintf (stderr, "%s:%d: type right parenthesis (%d)\n",
3241 logical_file_name, tp->line_number, nesting_level);
3243 next_is_argument = false;
3244 if (next_argparser != NULL)
3245 free (next_argparser);
3246 next_argparser = NULL;
3247 skip_until_comma = true;
3248 next_context_iter = null_context_list_iterator;
3251 case token_type_comma:
3252 case token_type_fat_comma:
3254 fprintf (stderr, "%s:%d: type comma (%d)\n",
3255 logical_file_name, tp->line_number, nesting_level);
3257 if (arglist_parser_decidedp (argparser, arg))
3259 /* We have missed the argument. */
3260 xgettext_current_source_encoding = po_charset_utf8;
3261 arglist_parser_done (argparser, arg);
3262 xgettext_current_source_encoding = xgettext_global_source_encoding;
3263 argparser = arglist_parser_alloc (mlp, NULL);
3268 fprintf (stderr, "%s:%d: arg: %d\n",
3269 real_file_name, tp->line_number, arg);
3272 inherited_context (outer_context,
3273 flag_context_list_iterator_advance (
3275 next_is_argument = false;
3276 if (next_argparser != NULL)
3277 free (next_argparser);
3278 next_argparser = NULL;
3279 skip_until_comma = false;
3280 next_context_iter = passthrough_context_list_iterator;
3283 case token_type_string:
3285 fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
3286 logical_file_name, tp->line_number, nesting_level,
3292 char *string = collect_message (mlp, tp, EXIT_SUCCESS);
3295 pos.file_name = logical_file_name;
3296 pos.line_number = tp->line_number;
3297 xgettext_current_source_encoding = po_charset_utf8;
3298 remember_a_message (mlp, NULL, string, inner_context, &pos,
3300 xgettext_current_source_encoding = xgettext_global_source_encoding;
3302 else if (!skip_until_comma)
3304 /* Need to collect the complete string, with error checking,
3305 only if the argument ARG is used in ARGPARSER. */
3306 bool must_collect = false;
3308 size_t nalternatives = argparser->nalternatives;
3311 for (i = 0; i < nalternatives; i++)
3313 struct partial_call *cp = &argparser->alternative[i];
3315 if (arg == cp->argnumc
3316 || arg == cp->argnum1 || arg == cp->argnum2)
3317 must_collect = true;
3323 char *string = collect_message (mlp, tp, EXIT_FAILURE);
3325 xgettext_current_source_encoding = po_charset_utf8;
3326 arglist_parser_remember (argparser, arg,
3327 string, inner_context,
3328 logical_file_name, tp->line_number,
3330 xgettext_current_source_encoding = xgettext_global_source_encoding;
3334 if (arglist_parser_decidedp (argparser, arg))
3336 xgettext_current_source_encoding = po_charset_utf8;
3337 arglist_parser_done (argparser, arg);
3338 xgettext_current_source_encoding = xgettext_global_source_encoding;
3339 argparser = arglist_parser_alloc (mlp, NULL);
3342 next_is_argument = false;
3343 if (next_argparser != NULL)
3344 free (next_argparser);
3345 next_argparser = NULL;
3346 next_context_iter = null_context_list_iterator;
3349 case token_type_number:
3351 fprintf (stderr, "%s:%d: type number (%d)\n",
3352 logical_file_name, tp->line_number, nesting_level);
3354 next_is_argument = false;
3355 if (next_argparser != NULL)
3356 free (next_argparser);
3357 next_argparser = NULL;
3358 next_context_iter = null_context_list_iterator;
3361 case token_type_eof:
3363 fprintf (stderr, "%s:%d: type EOF (%d)\n",
3364 logical_file_name, tp->line_number, nesting_level);
3366 xgettext_current_source_encoding = po_charset_utf8;
3367 arglist_parser_done (argparser, arg);
3368 xgettext_current_source_encoding = xgettext_global_source_encoding;
3369 if (next_argparser != NULL)
3370 free (next_argparser);
3371 next_argparser = NULL;
3375 case token_type_lbrace:
3377 fprintf (stderr, "%s:%d: type lbrace (%d)\n",
3378 logical_file_name, tp->line_number, nesting_level);
3380 if (extract_balanced (mlp, token_type_rbrace, true, false,
3381 null_context, null_context_list_iterator,
3382 1, arglist_parser_alloc (mlp, NULL)))
3384 xgettext_current_source_encoding = po_charset_utf8;
3385 arglist_parser_done (argparser, arg);
3386 xgettext_current_source_encoding = xgettext_global_source_encoding;
3387 if (next_argparser != NULL)
3388 free (next_argparser);
3392 next_is_argument = false;
3393 if (next_argparser != NULL)
3394 free (next_argparser);
3395 next_argparser = NULL;
3396 next_context_iter = null_context_list_iterator;
3399 case token_type_rbrace:
3401 fprintf (stderr, "%s:%d: type rbrace (%d)\n",
3402 logical_file_name, tp->line_number, nesting_level);
3404 next_is_argument = false;
3405 if (next_argparser != NULL)
3406 free (next_argparser);
3407 next_argparser = NULL;
3408 next_context_iter = null_context_list_iterator;
3411 case token_type_lbracket:
3413 fprintf (stderr, "%s:%d: type lbracket (%d)\n",
3414 logical_file_name, tp->line_number, nesting_level);
3416 if (extract_balanced (mlp, token_type_rbracket, true, false,
3417 null_context, null_context_list_iterator,
3418 1, arglist_parser_alloc (mlp, NULL)))
3420 xgettext_current_source_encoding = po_charset_utf8;
3421 arglist_parser_done (argparser, arg);
3422 xgettext_current_source_encoding = xgettext_global_source_encoding;
3423 if (next_argparser != NULL)
3424 free (next_argparser);
3428 next_is_argument = false;
3429 if (next_argparser != NULL)
3430 free (next_argparser);
3431 next_argparser = NULL;
3432 next_context_iter = null_context_list_iterator;
3435 case token_type_rbracket:
3437 fprintf (stderr, "%s:%d: type rbracket (%d)\n",
3438 logical_file_name, tp->line_number, nesting_level);
3440 next_is_argument = false;
3441 if (next_argparser != NULL)
3442 free (next_argparser);
3443 next_argparser = NULL;
3444 next_context_iter = null_context_list_iterator;
3447 case token_type_semicolon:
3449 fprintf (stderr, "%s:%d: type semicolon (%d)\n",
3450 logical_file_name, tp->line_number, nesting_level);
3453 /* The ultimate sign. */
3454 xgettext_current_source_encoding = po_charset_utf8;
3455 arglist_parser_done (argparser, arg);
3456 xgettext_current_source_encoding = xgettext_global_source_encoding;
3457 argparser = arglist_parser_alloc (mlp, NULL);
3459 /* FIXME: Instead of resetting outer_context here, it may be better
3460 to recurse in the next_is_argument handling above, waiting for
3461 the next semicolon or other statement terminator. */
3462 outer_context = null_context;
3463 context_iter = null_context_list_iterator;
3464 next_is_argument = false;
3465 if (next_argparser != NULL)
3466 free (next_argparser);
3467 next_argparser = NULL;
3468 next_context_iter = passthrough_context_list_iterator;
3470 inherited_context (outer_context,
3471 flag_context_list_iterator_advance (
3475 case token_type_dereference:
3477 fprintf (stderr, "%s:%d: type dereference (%d)\n",
3478 logical_file_name, tp->line_number, nesting_level);
3480 next_is_argument = false;
3481 if (next_argparser != NULL)
3482 free (next_argparser);
3483 next_argparser = NULL;
3484 next_context_iter = null_context_list_iterator;
3487 case token_type_dot:
3489 fprintf (stderr, "%s:%d: type dot (%d)\n",
3490 logical_file_name, tp->line_number, nesting_level);
3492 next_is_argument = false;
3493 if (next_argparser != NULL)
3494 free (next_argparser);
3495 next_argparser = NULL;
3496 next_context_iter = null_context_list_iterator;
3499 case token_type_named_op:
3501 fprintf (stderr, "%s:%d: type named operator (%d): %s\n",
3502 logical_file_name, tp->line_number, nesting_level,
3505 next_is_argument = false;
3506 if (next_argparser != NULL)
3507 free (next_argparser);
3508 next_argparser = NULL;
3509 next_context_iter = null_context_list_iterator;
3512 case token_type_regex_op:
3514 fprintf (stderr, "%s:%d: type regex operator (%d)\n",
3515 logical_file_name, tp->line_number, nesting_level);
3517 next_is_argument = false;
3518 if (next_argparser != NULL)
3519 free (next_argparser);
3520 next_argparser = NULL;
3521 next_context_iter = null_context_list_iterator;
3524 case token_type_other:
3526 fprintf (stderr, "%s:%d: type other (%d)\n",
3527 logical_file_name, tp->line_number, nesting_level);
3529 next_is_argument = false;
3530 if (next_argparser != NULL)
3531 free (next_argparser);
3532 next_argparser = NULL;
3533 next_context_iter = null_context_list_iterator;
3537 fprintf (stderr, "%s:%d: unknown token type %d\n",
3538 real_file_name, tp->line_number, tp->type);
3547 extract_perl (FILE *f, const char *real_filename, const char *logical_filename,
3548 flag_context_list_table_ty *flag_table,
3549 msgdomain_list_ty *mdlp)
3551 message_list_ty *mlp = mdlp->item[0]->messages;
3554 real_file_name = real_filename;
3555 logical_file_name = xstrdup (logical_filename);
3558 last_comment_line = -1;
3559 last_non_comment_line = -1;
3561 flag_context_list_table = flag_table;
3565 token_stack.items = NULL;
3566 token_stack.nitems = 0;
3567 token_stack.nitems_max = 0;
3571 end_of_file = false;
3573 /* Safe assumption. */
3574 last_token_type = token_type_semicolon;
3576 /* Eat tokens until eof is seen. When extract_balanced returns
3577 due to an unbalanced closing brace, just restart it. */
3578 while (!extract_balanced (mlp, token_type_rbrace, true, false,
3579 null_context, null_context_list_iterator,
3580 1, arglist_parser_alloc (mlp, NULL)))
3584 real_file_name = NULL;
3585 free (logical_file_name);
3586 logical_file_name = NULL;
3588 last_token_type = token_type_semicolon;
3589 token_stack_free (&token_stack);