1 /* xgettext YCP backend.
2 Copyright (C) 2001-2003, 2005-2009, 2011, 2015 Free Software
5 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
39 #define _(s) gettext(s)
41 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
44 /* The YCP syntax is defined in libycp/doc/syntax.html.
45 See also libycp/src/scanner.ll.
46 Both are part of the yast2-core package in SuSE Linux distributions. */
50 init_flag_table_ycp ()
52 xgettext_record_flag ("sformat:1:ycp-format");
53 xgettext_record_flag ("y2debug:1:ycp-format");
54 xgettext_record_flag ("y2milestone:1:ycp-format");
55 xgettext_record_flag ("y2warning:1:ycp-format");
56 xgettext_record_flag ("y2error:1:ycp-format");
57 xgettext_record_flag ("y2security:1:ycp-format");
58 xgettext_record_flag ("y2internal:1:ycp-format");
62 /* ======================== Reading of characters. ======================== */
65 /* Real filename, used in error messages about the input file. */
66 static const char *real_file_name;
68 /* Logical filename and line number, used to label the extracted messages. */
69 static char *logical_file_name;
70 static int line_number;
71 static int char_in_line;
73 /* The input file stream. */
76 /* These are for tracking whether comments count as immediately before
78 static int last_comment_line;
79 static int last_non_comment_line;
82 /* 1. line_number handling. */
92 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
108 /* Supports only one pushback character. */
110 phase1_ungetc (int c)
117 char_in_line = INT_MAX;
127 /* 2. Replace each comment that is not inside a character constant or
128 string literal with a space character. We need to remember the
129 comment for later, because it may be attached to a keyword string.
130 YCP comments can be in C comment syntax, C++ comment syntax or sh
133 static unsigned char phase2_pushback[1];
134 static int phase2_pushback_length;
140 static size_t bufmax;
146 if (phase2_pushback_length)
147 return phase2_pushback[--phase2_pushback_length];
149 if (char_in_line == 0)
151 /* Eat whitespace, to recognize ^[\t ]*# pattern. */
154 while (c == '\t' || c == ' ');
160 lineno = line_number;
164 if (c == '\n' || c == EOF)
166 /* We skip all leading white space, but not EOLs. */
167 if (!(buflen == 0 && (c == ' ' || c == '\t')))
169 if (buflen >= bufmax)
171 bufmax = 2 * bufmax + 10;
172 buffer = xrealloc (buffer, bufmax);
174 buffer[buflen++] = c;
177 if (buflen >= bufmax)
179 bufmax = 2 * bufmax + 10;
180 buffer = xrealloc (buffer, bufmax);
182 buffer[buflen] = '\0';
183 savable_comment_add (buffer);
184 last_comment_line = lineno;
204 lineno = line_number;
205 last_was_star = false;
211 /* We skip all leading white space, but not EOLs. */
212 if (buflen == 0 && (c == ' ' || c == '\t'))
214 if (buflen >= bufmax)
216 bufmax = 2 * bufmax + 10;
217 buffer = xrealloc (buffer, bufmax);
219 buffer[buflen++] = c;
225 && (buffer[buflen - 1] == ' '
226 || buffer[buflen - 1] == '\t'))
228 buffer[buflen] = '\0';
229 savable_comment_add (buffer);
231 lineno = line_number;
232 last_was_star = false;
236 last_was_star = true;
244 && (buffer[buflen - 1] == ' '
245 || buffer[buflen - 1] == '\t'))
247 buffer[buflen] = '\0';
248 savable_comment_add (buffer);
254 last_was_star = false;
259 last_comment_line = lineno;
265 lineno = line_number;
269 if (c == '\n' || c == EOF)
271 /* We skip all leading white space, but not EOLs. */
272 if (!(buflen == 0 && (c == ' ' || c == '\t')))
274 if (buflen >= bufmax)
276 bufmax = 2 * bufmax + 10;
277 buffer = xrealloc (buffer, bufmax);
279 buffer[buflen++] = c;
282 if (buflen >= bufmax)
284 bufmax = 2 * bufmax + 10;
285 buffer = xrealloc (buffer, bufmax);
287 buffer[buflen] = '\0';
288 savable_comment_add (buffer);
289 last_comment_line = lineno;
297 /* Supports only one pushback character. */
299 phase2_ungetc (int c)
303 if (phase2_pushback_length == SIZEOF (phase2_pushback))
305 phase2_pushback[phase2_pushback_length++] = c;
310 /* ========================== Reading of tokens. ========================== */
316 token_type_lparen, /* ( */
317 token_type_rparen, /* ) */
318 token_type_comma, /* , */
319 token_type_i18n, /* _( */
320 token_type_string_literal, /* "abc" */
321 token_type_symbol, /* symbol, number */
322 token_type_other /* misc. operator */
324 typedef enum token_type_ty token_type_ty;
326 typedef struct token_ty token_ty;
330 char *string; /* for token_type_string_literal, token_type_symbol */
331 refcounted_string_list_ty *comment; /* for token_type_string_literal */
336 /* 7. Replace escape sequences within character strings with their
337 single character equivalents. */
339 #define P7_QUOTES (1000 + '"')
348 /* Use phase 1, because phase 2 elides comments. */
370 /* FIXME: What is the octal escape syntax?
371 syntax.html says: [0] [0-7]+
372 scanner.ll says: [0-7] [0-7] [0-7]
375 case '0': case '1': case '2': case '3':
376 case '4': case '5': case '6': case '7':
381 for (j = 0; j < 3; ++j)
390 case '0': case '1': case '2': case '3':
391 case '4': case '5': case '6': case '7':
408 /* Free the memory pointed to by a 'struct token_ty'. */
410 free_token (token_ty *tp)
412 if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
414 if (tp->type == token_type_string_literal)
415 drop_reference (tp->comment);
419 /* Combine characters into tokens. Discard whitespace. */
421 static token_ty phase5_pushback[1];
422 static int phase5_pushback_length;
425 phase5_get (token_ty *tp)
432 if (phase5_pushback_length)
434 *tp = phase5_pushback[--phase5_pushback_length];
439 tp->line_number = line_number;
445 tp->type = token_type_eof;
449 if (last_non_comment_line > last_comment_line)
450 savable_comment_reset ();
455 /* Ignore whitespace and comments. */
459 last_non_comment_line = tp->line_number;
463 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
464 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
465 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
466 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
469 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
470 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
471 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
472 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
474 case '0': case '1': case '2': case '3': case '4':
475 case '5': case '6': case '7': case '8': case '9':
476 /* Symbol, or part of a number. */
480 if (bufpos >= bufmax)
482 bufmax = 2 * bufmax + 10;
483 buffer = xrealloc (buffer, bufmax);
485 buffer[bufpos++] = c;
489 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
490 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
491 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
492 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
495 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
496 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
497 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
498 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
500 case '0': case '1': case '2': case '3': case '4':
501 case '5': case '6': case '7': case '8': case '9':
504 if (bufpos == 1 && buffer[0] == '_' && c == '(')
506 tp->type = token_type_i18n;
514 if (bufpos >= bufmax)
516 bufmax = 2 * bufmax + 10;
517 buffer = xrealloc (buffer, bufmax);
519 buffer[bufpos] = '\0';
520 tp->string = xstrdup (buffer);
521 tp->type = token_type_symbol;
529 if (c == EOF || c == P7_QUOTES)
531 if (bufpos >= bufmax)
533 bufmax = 2 * bufmax + 10;
534 buffer = xrealloc (buffer, bufmax);
536 buffer[bufpos++] = c;
538 if (bufpos >= bufmax)
540 bufmax = 2 * bufmax + 10;
541 buffer = xrealloc (buffer, bufmax);
543 buffer[bufpos] = '\0';
544 tp->string = xstrdup (buffer);
545 tp->type = token_type_string_literal;
546 tp->comment = add_reference (savable_comment);
550 tp->type = token_type_lparen;
554 tp->type = token_type_rparen;
558 tp->type = token_type_comma;
562 /* We could carefully recognize each of the 2 and 3 character
563 operators, but it is not necessary, as we only need to recognize
564 gettext invocations. Don't bother. */
565 tp->type = token_type_other;
571 /* Supports only one pushback token. */
573 phase5_unget (token_ty *tp)
575 if (tp->type != token_type_eof)
577 if (phase5_pushback_length == SIZEOF (phase5_pushback))
579 phase5_pushback[phase5_pushback_length++] = *tp;
584 /* Concatenate adjacent string literals to form single string literals.
585 (See libycp/src/parser.yy, rule 'string' vs. terminal 'STRING'.) */
588 phase8_get (token_ty *tp)
591 if (tp->type != token_type_string_literal)
599 if (tmp.type != token_type_string_literal)
604 len = strlen (tp->string);
605 tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
606 strcpy (tp->string + len, tmp.string);
612 /* ========================= Extracting strings. ========================== */
615 /* Context lookup table. */
616 static flag_context_list_table_ty *flag_context_list_table;
619 /* The file is broken into tokens.
621 Normal handling: Look for
622 [A] _( [B] msgid ... )
623 Plural handling: Look for
624 [A] _( [B] msgid [C] , [D] msgid_plural ... )
625 At point [A]: state == 0.
626 At point [B]: state == 1, plural_mp == NULL.
627 At point [C]: state == 2, plural_mp != NULL.
628 At point [D]: state == 1, plural_mp != NULL.
630 We use recursion because we have to set the context according to the given
634 /* Extract messages until the next balanced closing parenthesis.
635 Extracted messages are added to MLP.
636 Return true upon eof, false upon closing parenthesis. */
638 extract_parenthesized (message_list_ty *mlp,
639 flag_context_ty outer_context,
640 flag_context_list_iterator_ty context_iter,
643 int state; /* 1 or 2 inside _( ... ), otherwise 0 */
644 int plural_state = 0; /* defined only when in states 1 and 2 */
645 message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */
646 /* Context iterator that will be used if the next token is a '('. */
647 flag_context_list_iterator_ty next_context_iter =
648 passthrough_context_list_iterator;
649 /* Current context. */
650 flag_context_ty inner_context =
651 inherited_context (outer_context,
652 flag_context_list_iterator_advance (&context_iter));
654 /* Start state is 0 or 1. */
655 state = (in_i18n ? 1 : 0);
668 case token_type_i18n:
669 if (extract_parenthesized (mlp, inner_context, next_context_iter,
672 next_context_iter = null_context_list_iterator;
676 case token_type_string_literal:
680 pos.file_name = logical_file_name;
681 pos.line_number = token.line_number;
683 if (plural_state == 0)
686 plural_mp = remember_a_message (mlp, NULL, token.string,
688 NULL, token.comment);
694 /* Seen an msgid_plural. */
695 if (plural_mp != NULL)
696 remember_a_message_plural (plural_mp, token.string,
701 drop_reference (token.comment);
708 next_context_iter = null_context_list_iterator;
711 case token_type_symbol:
713 flag_context_list_iterator (
714 flag_context_list_table_lookup (
715 flag_context_list_table,
716 token.string, strlen (token.string)));
721 case token_type_lparen:
722 if (extract_parenthesized (mlp, inner_context, next_context_iter,
725 next_context_iter = null_context_list_iterator;
729 case token_type_rparen:
732 case token_type_comma:
738 inherited_context (outer_context,
739 flag_context_list_iterator_advance (
741 next_context_iter = passthrough_context_list_iterator;
744 case token_type_other:
745 next_context_iter = null_context_list_iterator;
760 extract_ycp (FILE *f,
761 const char *real_filename, const char *logical_filename,
762 flag_context_list_table_ty *flag_table,
763 msgdomain_list_ty *mdlp)
765 message_list_ty *mlp = mdlp->item[0]->messages;
768 real_file_name = real_filename;
769 logical_file_name = xstrdup (logical_filename);
773 last_comment_line = -1;
774 last_non_comment_line = -1;
776 flag_context_list_table = flag_table;
778 /* Eat tokens until eof is seen. When extract_parenthesized returns
779 due to an unbalanced closing parenthesis, just restart it. */
780 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
785 real_file_name = NULL;
786 logical_file_name = NULL;