1 /* xgettext librep backend.
2 Copyright (C) 2001-2003, 2005-2009, 2015 Free Software Foundation,
5 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
41 #define _(s) gettext(s)
44 /* Summary of librep syntax:
45 - ';' starts a comment until end of line.
46 - Block comments start with '#|' and end with '|#'.
47 - Numbers are constituted of an optional prefix (#b, #B for binary,
48 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
49 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
51 - Characters are written as '?' followed by the character, possibly
52 with an escape sequence, for examples '?a', '?\n', '?\177'.
53 - Strings are delimited by double quotes. Backslash introduces an escape
54 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
55 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
56 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
57 if preceded by backslash or enclosed in |...|.
58 - Keywords: written as #:SYMBOL.
61 The reader is implemented in librep-0.14/src/lisp.c. */
64 /* ====================== Keyword set customization. ====================== */
66 /* If true extract all strings. */
67 static bool extract_all = false;
69 static hash_table keywords;
70 static bool default_keywords = true;
74 x_librep_extract_all ()
81 x_librep_keyword (const char *name)
84 default_keywords = false;
88 struct callshape shape;
91 if (keywords.table == NULL)
92 hash_init (&keywords, 100);
94 split_keywordspec (name, &end, &shape);
96 /* The characters between name and end should form a valid Lisp
98 colon = strchr (name, ':');
99 if (colon == NULL || colon >= end)
100 insert_keyword_callshape (&keywords, name, end - name, &shape);
104 /* Finish initializing the keywords hash table.
105 Called after argument processing, before each file is processed. */
109 if (default_keywords)
111 /* When adding new keywords here, also update the documentation in
113 x_librep_keyword ("_");
114 default_keywords = false;
119 init_flag_table_librep ()
121 xgettext_record_flag ("_:1:pass-librep-format");
122 xgettext_record_flag ("format:2:librep-format");
126 /* ======================== Reading of characters. ======================== */
128 /* Real filename, used in error messages about the input file. */
129 static const char *real_file_name;
131 /* Logical filename and line number, used to label the extracted messages. */
132 static char *logical_file_name;
133 static int line_number;
135 /* The input file stream. */
139 /* Fetch the next character from the input file. */
148 error (EXIT_FAILURE, errno, _("\
149 error while reading \"%s\""), real_file_name);
157 /* Put back the last fetched character, not EOF. */
167 /* ========================== Reading of tokens. ========================== */
170 /* A token consists of a sequence of characters. */
173 int allocated; /* number of allocated 'token_char's */
174 int charcount; /* number of used 'token_char's */
175 char *chars; /* the token's constituents */
178 /* Initialize a 'struct token'. */
180 init_token (struct token *tp)
183 tp->chars = XNMALLOC (tp->allocated, char);
187 /* Free the memory pointed to by a 'struct token'. */
189 free_token (struct token *tp)
194 /* Ensure there is enough room in the token for one more character. */
196 grow_token (struct token *tp)
198 if (tp->charcount == tp->allocated)
201 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
205 /* Read the next token. If 'first' is given, it points to the first
206 character, which has already been read. Returns true for a symbol,
207 false for a number. */
209 read_token (struct token *tp, const int *first)
212 /* Variables for speculative number parsing: */
216 bool rational = false;
217 bool exponent = false;
218 bool had_sign = false;
219 bool expecting_prefix = false;
228 for (;; c = do_getc ())
235 case ' ': case '\t': case '\n': case '\f': case '\r':
236 case '(': case ')': case '[': case ']':
237 case '\'': case '"': case ';': case ',': case '`':
244 /* Invalid, but be tolerant. */
247 tp->chars[tp->charcount++] = c;
255 if (c == EOF || c == '|')
258 tp->chars[tp->charcount++] = c;
265 if (expecting_prefix)
288 expecting_prefix = false;
289 nfirst = tp->charcount + 1;
291 else if (tp->charcount == nfirst
292 && (c == '+' || c == '-' || c == '#'))
299 expecting_prefix = true;
303 nfirst = tp->charcount + 1;
315 else if (!(c >= '0' && c <= '9'))
328 nfirst = tp->charcount + 1;
330 case '0': case '1': case '2': case '3': case '4':
331 case '5': case '6': case '7':
333 nfirst = tp->charcount;
335 case '.': case 'E': case 'e':
353 if (exact && radix == 10 && !rational)
359 if (exact && !rational)
367 if (!rational && !exponent)
378 if (exponent && (c == '+' || c == '-'))
381 && !(c >= '0' && c <= '0' + radix - 1))
382 || (radix == 16 && !c_isxdigit (c)))
396 tp->chars[tp->charcount++] = c;
402 if (radix > 0 && nfirst < tp->charcount)
403 return false; /* number */
405 return true; /* symbol */
409 /* ========================= Accumulating comments ========================= */
413 static size_t bufmax;
414 static size_t buflen;
425 if (buflen >= bufmax)
427 bufmax = 2 * bufmax + 10;
428 buffer = xrealloc (buffer, bufmax);
430 buffer[buflen++] = c;
434 comment_line_end (size_t chars_to_remove)
436 buflen -= chars_to_remove;
438 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
440 if (chars_to_remove == 0 && buflen >= bufmax)
442 bufmax = 2 * bufmax + 10;
443 buffer = xrealloc (buffer, bufmax);
445 buffer[buflen] = '\0';
446 savable_comment_add (buffer);
450 /* These are for tracking whether comments count as immediately before
452 static int last_comment_line;
453 static int last_non_comment_line;
456 /* ========================= Accumulating messages ========================= */
459 static message_list_ty *mlp;
462 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
465 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
466 Other objects need not to be represented precisely. */
469 t_symbol, /* symbol */
470 t_string, /* string */
471 t_other, /* other kind of real object */
472 t_dot, /* '.' pseudo object */
473 t_close, /* ')' or ']' pseudo object */
474 t_eof /* EOF marker */
479 enum object_type type;
480 struct token *token; /* for t_symbol and t_string */
481 int line_number_at_start; /* for t_string */
484 /* Free the memory pointed to by a 'struct object'. */
486 free_object (struct object *op)
488 if (op->type == t_symbol || op->type == t_string)
490 free_token (op->token);
495 /* Convert a t_symbol/t_string token to a char*. */
497 string_of_object (const struct object *op)
502 if (!(op->type == t_symbol || op->type == t_string))
504 n = op->token->charcount;
505 str = XNMALLOC (n + 1, char);
506 memcpy (str, op->token->chars, n);
511 /* Context lookup table. */
512 static flag_context_list_table_ty *flag_context_list_table;
514 /* Returns the character represented by an escape sequence. */
516 do_getc_escaped (int c)
537 case '0': case '1': case '2': case '3': case '4':
538 case '5': case '6': case '7':
545 if (c >= '0' && c <= '7')
547 n = (n << 3) + (c - '0');
551 if (c >= '0' && c <= '7')
552 n = (n << 3) + (c - '0');
560 return (unsigned char) n;
571 else if (c >= '0' && c <= '9')
572 n = (n << 4) + (c - '0');
573 else if (c >= 'A' && c <= 'F')
574 n = (n << 4) + (c - 'A' + 10);
575 else if (c >= 'a' && c <= 'f')
576 n = (n << 4) + (c - 'a' + 10);
583 return (unsigned char) n;
590 /* Read the next object. */
592 read_object (struct object *op, flag_context_ty outer_context)
607 /* Comments assumed to be grouped with a message must immediately
608 precede it, with no non-whitespace token on a line between
610 if (last_non_comment_line > last_comment_line)
611 savable_comment_reset ();
614 case ' ': case '\t': case '\f': case '\r':
619 int arg = 0; /* Current argument number. */
620 flag_context_list_iterator_ty context_iter;
621 const struct callshapes *shapes = NULL;
622 struct arglist_parser *argparser = NULL;
627 flag_context_ty inner_context;
630 inner_context = null_context;
633 inherited_context (outer_context,
634 flag_context_list_iterator_advance (
637 read_object (&inner, inner_context);
639 /* Recognize end of list. */
640 if (inner.type == t_close)
643 /* Don't bother converting "()" to "NIL". */
644 last_non_comment_line = line_number;
645 if (argparser != NULL)
646 arglist_parser_done (argparser, arg);
650 /* Dots are not allowed in every position.
653 /* EOF inside list is illegal. But be tolerant. */
654 if (inner.type == t_eof)
659 /* This is the function position. */
660 if (inner.type == t_symbol)
662 char *symbol_name = string_of_object (&inner);
665 if (hash_find_entry (&keywords,
666 symbol_name, strlen (symbol_name),
669 shapes = (const struct callshapes *) keyword_value;
671 argparser = arglist_parser_alloc (mlp, shapes);
674 flag_context_list_iterator (
675 flag_context_list_table_lookup (
676 flag_context_list_table,
677 symbol_name, strlen (symbol_name)));
682 context_iter = null_context_list_iterator;
686 /* These are the argument positions. */
687 if (argparser != NULL && inner.type == t_string)
688 arglist_parser_remember (argparser, arg,
689 string_of_object (&inner),
692 inner.line_number_at_start,
696 free_object (&inner);
699 if (argparser != NULL)
700 arglist_parser_done (argparser, arg);
703 last_non_comment_line = line_number;
712 read_object (&inner, null_context);
714 /* Recognize end of vector. */
715 if (inner.type == t_close)
718 last_non_comment_line = line_number;
722 /* Dots are not allowed. But be tolerant. */
724 /* EOF inside vector is illegal. But be tolerant. */
725 if (inner.type == t_eof)
728 free_object (&inner);
732 last_non_comment_line = line_number;
736 /* Tell the caller about the end of list or vector.
737 Unmatched closing parenthesis is illegal. But be tolerant. */
739 last_non_comment_line = line_number;
745 /* The ,@ handling inside lists is wrong anyway, because
746 ,@form expands to an unknown number of elements. */
747 if (c != EOF && c != '@')
756 read_object (&inner, null_context);
758 /* Dots and EOF are not allowed here. But be tolerant. */
760 free_object (&inner);
763 last_non_comment_line = line_number;
769 bool all_semicolons = true;
771 last_comment_line = line_number;
776 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
779 all_semicolons = false;
782 /* We skip all leading white space, but not EOLs. */
783 if (!(buflen == 0 && (c == ' ' || c == '\t')))
787 comment_line_end (0);
793 op->token = XMALLOC (struct token);
794 init_token (op->token);
795 op->line_number_at_start = line_number;
800 /* Invalid input. Be tolerant, no error message. */
808 /* Invalid input. Be tolerant, no error message. */
811 /* Ignore escaped newline. */
815 c = do_getc_escaped (c);
817 /* Invalid input. Be tolerant, no error message. */
819 grow_token (op->token);
820 op->token->chars[op->token->charcount++] = c;
825 grow_token (op->token);
826 op->token->chars[op->token->charcount++] = c;
835 pos.file_name = logical_file_name;
836 pos.line_number = op->line_number_at_start;
837 remember_a_message (mlp, NULL, string_of_object (op),
838 null_context, &pos, NULL, savable_comment);
840 last_non_comment_line = line_number;
847 /* Invalid input. Be tolerant, no error message. */
853 /* Invalid input. Be tolerant, no error message. */
857 c = do_getc_escaped (c);
859 /* Invalid input. Be tolerant, no error message. */
864 last_non_comment_line = line_number;
868 /* Dispatch macro handling. */
871 /* Invalid input. Be tolerant, no error message. */
881 /* Skip comment until !# */
891 if (c == EOF || c == '#')
899 /* EOF not allowed here. But be tolerant. */
910 read_object (&inner, null_context);
911 /* Dots and EOF are not allowed here.
913 free_object (&inner);
915 last_non_comment_line = line_number;
924 read_object (&inner, null_context);
925 /* Dots and EOF are not allowed here.
927 free_object (&inner);
929 last_non_comment_line = line_number;
952 comment_line_end (0);
978 /* We skip all leading white space. */
979 if (!(buflen == 0 && (c == ' ' || c == '\t')))
983 comment_line_end (1);
991 /* EOF not allowed here. But be tolerant. */
995 last_comment_line = line_number;
1003 read_token (&token, &first);
1004 free_token (&token);
1006 last_non_comment_line = line_number;
1013 last_non_comment_line = line_number;
1026 read_token (&token, &c);
1027 free_token (&token);
1029 last_non_comment_line = line_number;
1034 /* Invalid input. Be tolerant, no error message. */
1036 last_non_comment_line = line_number;
1048 op->token = XMALLOC (struct token);
1049 symbol = read_token (op->token, &c);
1050 if (op->token->charcount == 1 && op->token->chars[0] == '.')
1052 free_token (op->token);
1055 last_non_comment_line = line_number;
1060 free_token (op->token);
1063 last_non_comment_line = line_number;
1066 /* Distinguish between "foo" and "foo#bar". */
1070 struct token second_token;
1072 free_token (op->token);
1074 read_token (&second_token, NULL);
1075 free_token (&second_token);
1077 last_non_comment_line = line_number;
1084 op->type = t_symbol;
1085 last_non_comment_line = line_number;
1095 extract_librep (FILE *f,
1096 const char *real_filename, const char *logical_filename,
1097 flag_context_list_table_ty *flag_table,
1098 msgdomain_list_ty *mdlp)
1100 mlp = mdlp->item[0]->messages;
1103 real_file_name = real_filename;
1104 logical_file_name = xstrdup (logical_filename);
1107 last_comment_line = -1;
1108 last_non_comment_line = -1;
1110 flag_context_list_table = flag_table;
1114 /* Eat tokens until eof is seen. When read_object returns
1115 due to an unbalanced closing parenthesis, just restart it. */
1118 struct object toplevel_object;
1120 read_object (&toplevel_object, null_context);
1122 if (toplevel_object.type == t_eof)
1125 free_object (&toplevel_object);
1129 /* Close scanner. */
1131 real_file_name = NULL;
1132 logical_file_name = NULL;