1 /* xgettext Emacs Lisp backend.
2 Copyright (C) 2001-2003, 2005-2009, 2015 Free Software Foundation,
5 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
41 #define _(s) gettext(s)
44 /* Summary of Emacs Lisp syntax:
45 - ';' starts a comment until end of line.
46 - '#@nn' starts a comment of nn bytes.
47 - Integers are constituted of an optional prefix (#b, #B for binary,
48 #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
49 an optional sign (+ or -), the digits, and an optional trailing dot.
50 - Characters are written as '?' followed by the character, possibly
51 with an escape sequence, for examples '?a', '?\n', '?\177'.
52 - Strings are delimited by double quotes. Backslash introduces an escape
53 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
54 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
55 - Symbols: can contain meta-characters if preceded by backslash.
56 - Uninterned symbols: written as #:SYMBOL.
59 The reader is implemented in emacs-21.1/src/lread.c. */
62 /* ====================== Keyword set customization. ====================== */
64 /* If true extract all strings. */
65 static bool extract_all = false;
67 static hash_table keywords;
68 static bool default_keywords = true;
72 x_elisp_extract_all ()
79 x_elisp_keyword (const char *name)
82 default_keywords = false;
86 struct callshape shape;
89 if (keywords.table == NULL)
90 hash_init (&keywords, 100);
92 split_keywordspec (name, &end, &shape);
94 /* The characters between name and end should form a valid Lisp
96 colon = strchr (name, ':');
97 if (colon == NULL || colon >= end)
98 insert_keyword_callshape (&keywords, name, end - name, &shape);
102 /* Finish initializing the keywords hash table.
103 Called after argument processing, before each file is processed. */
107 if (default_keywords)
109 /* When adding new keywords here, also update the documentation in
111 x_elisp_keyword ("_");
112 default_keywords = false;
117 init_flag_table_elisp ()
119 xgettext_record_flag ("_:1:pass-elisp-format");
120 xgettext_record_flag ("format:1:elisp-format");
124 /* ======================== Reading of characters. ======================== */
126 /* Real filename, used in error messages about the input file. */
127 static const char *real_file_name;
129 /* Logical filename and line number, used to label the extracted messages. */
130 static char *logical_file_name;
131 static int line_number;
133 /* The input file stream. */
137 /* Fetch the next character from the input file. */
146 error (EXIT_FAILURE, errno, _("\
147 error while reading \"%s\""), real_file_name);
155 /* Put back the last fetched character, not EOF. */
165 /* ========================== Reading of tokens. ========================== */
168 /* A token consists of a sequence of characters. */
171 int allocated; /* number of allocated 'token_char's */
172 int charcount; /* number of used 'token_char's */
173 char *chars; /* the token's constituents */
176 /* Initialize a 'struct token'. */
178 init_token (struct token *tp)
181 tp->chars = XNMALLOC (tp->allocated, char);
185 /* Free the memory pointed to by a 'struct token'. */
187 free_token (struct token *tp)
192 /* Ensure there is enough room in the token for one more character. */
194 grow_token (struct token *tp)
196 if (tp->charcount == tp->allocated)
199 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
203 /* Test whether a token has integer syntax. */
205 is_integer (const char *p)
207 /* NB: Yes, '+.' and '-.' both designate the integer 0. */
208 const char *p_start = p;
210 if (*p == '+' || *p == '-')
214 while (*p >= '0' && *p <= '9')
216 if (p > p_start && *p == '.')
221 /* Test whether a token has float syntax. */
223 is_float (const char *p)
225 enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
229 if (*p == '+' || *p == '-')
231 if (*p >= '0' && *p <= '9')
236 while (*p >= '0' && *p <= '9');
243 if (*p >= '0' && *p <= '9')
248 while (*p >= '0' && *p <= '9');
250 if (*p == 'e' || *p == 'E')
254 if (*p == '+' || *p == '-')
256 if (*p >= '0' && *p <= '9')
261 while (*p >= '0' && *p <= '9');
263 else if (p[-1] == '+'
264 && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
265 || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
272 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
273 || state == (DOT_CHAR | TRAIL_INT)
274 || state == (LEAD_INT | E_CHAR | EXP_INT)
275 || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
276 || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
279 /* Read the next token. 'first' is the first character, which has already
280 been read. Returns true for a symbol, false for a number. */
282 read_token (struct token *tp, int first)
291 for (;; c = do_getc ())
295 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
297 if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
298 || c == '[' || c == ']' || c == '#')
305 /* Invalid, but be tolerant. */
309 tp->chars[tp->charcount++] = c;
315 return true; /* symbol */
317 /* Add a NUL byte at the end, for is_integer and is_float. */
319 tp->chars[tp->charcount] = '\0';
321 if (is_integer (tp->chars) || is_float (tp->chars))
322 return false; /* number */
324 return true; /* symbol */
328 /* ========================= Accumulating comments ========================= */
332 static size_t bufmax;
333 static size_t buflen;
344 if (buflen >= bufmax)
346 bufmax = 2 * bufmax + 10;
347 buffer = xrealloc (buffer, bufmax);
349 buffer[buflen++] = c;
353 comment_line_end (size_t chars_to_remove)
355 buflen -= chars_to_remove;
357 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
359 if (chars_to_remove == 0 && buflen >= bufmax)
361 bufmax = 2 * bufmax + 10;
362 buffer = xrealloc (buffer, bufmax);
364 buffer[buflen] = '\0';
365 savable_comment_add (buffer);
369 /* These are for tracking whether comments count as immediately before
371 static int last_comment_line;
372 static int last_non_comment_line;
375 /* ========================= Accumulating messages ========================= */
378 static message_list_ty *mlp;
381 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
384 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
385 Other objects need not to be represented precisely. */
388 t_symbol, /* symbol */
389 t_string, /* string */
390 t_other, /* other kind of real object */
391 t_dot, /* '.' pseudo object */
392 t_listclose, /* ')' pseudo object */
393 t_vectorclose,/* ']' pseudo object */
394 t_eof /* EOF marker */
399 enum object_type type;
400 struct token *token; /* for t_symbol and t_string */
401 int line_number_at_start; /* for t_string */
404 /* Free the memory pointed to by a 'struct object'. */
406 free_object (struct object *op)
408 if (op->type == t_symbol || op->type == t_string)
410 free_token (op->token);
415 /* Convert a t_symbol/t_string token to a char*. */
417 string_of_object (const struct object *op)
422 if (!(op->type == t_symbol || op->type == t_string))
424 n = op->token->charcount;
425 str = XNMALLOC (n + 1, char);
426 memcpy (str, op->token->chars, n);
431 /* Context lookup table. */
432 static flag_context_list_table_ty *flag_context_list_table;
434 /* Returns the character represented by an escape sequence. */
435 #define IGNORABLE_ESCAPE (EOF - 1)
437 do_getc_escaped (int c, bool in_string)
461 return IGNORABLE_ESCAPE;
464 return (in_string ? IGNORABLE_ESCAPE : ' ');
471 /* Invalid input. But be tolerant. */
481 c = do_getc_escaped (c, false);
485 case 'S': /* shift */
490 /* Invalid input. But be tolerant. */
500 c = do_getc_escaped (c, false);
502 return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
504 case 'H': /* hyper */
506 case 's': /* super */
511 /* Invalid input. But be tolerant. */
521 c = do_getc_escaped (c, false);
530 /* Invalid input. But be tolerant. */
542 c = do_getc_escaped (c, false);
546 if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
548 if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
550 #if 0 /* We cannot handle NUL bytes in strings. */
556 case '0': case '1': case '2': case '3': case '4':
557 case '5': case '6': case '7':
558 /* An octal escape, as in ANSI C. */
565 if (c >= '0' && c <= '7')
567 n = (n << 3) + (c - '0');
571 if (c >= '0' && c <= '7')
572 n = (n << 3) + (c - '0');
580 return (unsigned char) n;
584 /* A hexadecimal escape, as in ANSI C. */
593 else if (c >= '0' && c <= '9')
594 n = (n << 4) + (c - '0');
595 else if (c >= 'A' && c <= 'F')
596 n = (n << 4) + (c - 'A' + 10);
597 else if (c >= 'a' && c <= 'f')
598 n = (n << 4) + (c - 'a' + 10);
605 return (unsigned char) n;
609 /* Ignore Emacs multibyte character stuff. All the strings we are
610 interested in are ASCII strings. */
615 /* Read the next object.
616 'first_in_list' and 'new_backquote_flag' are used for reading old
617 backquote syntax and new backquote syntax. */
619 read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
620 flag_context_ty outer_context)
635 /* Comments assumed to be grouped with a message must immediately
636 precede it, with no non-whitespace token on a line between
638 if (last_non_comment_line > last_comment_line)
639 savable_comment_reset ();
644 int arg = 0; /* Current argument number. */
645 flag_context_list_iterator_ty context_iter;
646 const struct callshapes *shapes = NULL;
647 struct arglist_parser *argparser = NULL;
652 flag_context_ty inner_context;
655 inner_context = null_context;
658 inherited_context (outer_context,
659 flag_context_list_iterator_advance (
662 read_object (&inner, arg == 0, new_backquote_flag,
665 /* Recognize end of list. */
666 if (inner.type == t_listclose)
669 /* Don't bother converting "()" to "NIL". */
670 last_non_comment_line = line_number;
671 if (argparser != NULL)
672 arglist_parser_done (argparser, arg);
676 /* Dots are not allowed in every position. ']' is not allowed.
679 /* EOF inside list is illegal. But be tolerant. */
680 if (inner.type == t_eof)
685 /* This is the function position. */
686 if (inner.type == t_symbol)
688 char *symbol_name = string_of_object (&inner);
691 if (hash_find_entry (&keywords,
692 symbol_name, strlen (symbol_name),
695 shapes = (const struct callshapes *) keyword_value;
697 argparser = arglist_parser_alloc (mlp, shapes);
700 flag_context_list_iterator (
701 flag_context_list_table_lookup (
702 flag_context_list_table,
703 symbol_name, strlen (symbol_name)));
708 context_iter = null_context_list_iterator;
712 /* These are the argument positions. */
713 if (argparser != NULL && inner.type == t_string)
714 arglist_parser_remember (argparser, arg,
715 string_of_object (&inner),
718 inner.line_number_at_start,
722 free_object (&inner);
725 if (argparser != NULL)
726 arglist_parser_done (argparser, arg);
729 last_non_comment_line = line_number;
733 /* Tell the caller about the end of list.
734 Unmatched closing parenthesis is illegal. But be tolerant. */
735 op->type = t_listclose;
736 last_non_comment_line = line_number;
745 read_object (&inner, false, new_backquote_flag, null_context);
747 /* Recognize end of vector. */
748 if (inner.type == t_vectorclose)
751 last_non_comment_line = line_number;
755 /* Dots and ')' are not allowed. But be tolerant. */
757 /* EOF inside vector is illegal. But be tolerant. */
758 if (inner.type == t_eof)
761 free_object (&inner);
765 last_non_comment_line = line_number;
769 /* Tell the caller about the end of vector.
770 Unmatched closing bracket is illegal. But be tolerant. */
771 op->type = t_vectorclose;
772 last_non_comment_line = line_number;
779 read_object (&inner, false, new_backquote_flag, null_context);
781 /* Dots and EOF are not allowed here. But be tolerant. */
783 free_object (&inner);
786 last_non_comment_line = line_number;
796 read_object (&inner, false, true, null_context);
798 /* Dots and EOF are not allowed here. But be tolerant. */
800 free_object (&inner);
803 last_non_comment_line = line_number;
808 if (!new_backquote_flag)
812 /* The ,@ handling inside lists is wrong anyway, because
813 ,@form expands to an unknown number of elements. */
814 if (c != EOF && c != '@' && c != '.')
820 read_object (&inner, false, false, null_context);
822 /* Dots and EOF are not allowed here. But be tolerant. */
824 free_object (&inner);
827 last_non_comment_line = line_number;
833 bool all_semicolons = true;
835 last_comment_line = line_number;
840 if (c == EOF || c == '\n')
843 all_semicolons = false;
846 /* We skip all leading white space, but not EOLs. */
847 if (!(buflen == 0 && (c == ' ' || c == '\t')))
851 comment_line_end (0);
857 op->token = XMALLOC (struct token);
858 init_token (op->token);
859 op->line_number_at_start = line_number;
864 /* Invalid input. Be tolerant, no error message. */
872 /* Invalid input. Be tolerant, no error message. */
874 c = do_getc_escaped (c, true);
876 /* Invalid input. Be tolerant, no error message. */
878 if (c == IGNORABLE_ESCAPE)
879 /* Ignore escaped newline and escaped space. */
883 grow_token (op->token);
884 op->token->chars[op->token->charcount++] = c;
889 grow_token (op->token);
890 op->token->chars[op->token->charcount++] = c;
899 pos.file_name = logical_file_name;
900 pos.line_number = op->line_number_at_start;
901 remember_a_message (mlp, NULL, string_of_object (op),
902 null_context, &pos, NULL, savable_comment);
904 last_non_comment_line = line_number;
911 /* Invalid input. Be tolerant, no error message. */
917 /* Invalid input. Be tolerant, no error message. */
921 c = do_getc_escaped (c, false);
923 /* Invalid input. Be tolerant, no error message. */
927 /* Impossible to deal with Emacs multibyte character stuff here. */
929 last_non_comment_line = line_number;
933 /* Dispatch macro handling. */
936 /* Invalid input. Be tolerant, no error message. */
950 /* Read a char table, same syntax as a vector. */
955 read_object (&inner, false, new_backquote_flag,
958 /* Recognize end of vector. */
959 if (inner.type == t_vectorclose)
962 last_non_comment_line = line_number;
966 /* Dots and ')' are not allowed. But be tolerant. */
968 /* EOF inside vector is illegal. But be tolerant. */
969 if (inner.type == t_eof)
972 free_object (&inner);
975 last_non_comment_line = line_number;
979 /* Invalid input. Be tolerant, no error message. */
983 last_non_comment_line = line_number;
988 /* Read a bit vector. */
990 struct object length;
991 read_object (&length, first_in_list, new_backquote_flag,
993 /* Dots and EOF are not allowed here.
995 free_object (&length);
1000 struct object string;
1001 read_object (&string, first_in_list, new_backquote_flag,
1003 free_object (&string);
1006 /* Invalid input. Be tolerant, no error message. */
1009 last_non_comment_line = line_number;
1013 /* Read a compiled function, same syntax as a vector. */
1015 /* Read a string with properties, same syntax as a list. */
1017 struct object inner;
1019 read_object (&inner, false, new_backquote_flag, null_context);
1020 /* Dots and EOF are not allowed here.
1022 free_object (&inner);
1024 last_non_comment_line = line_number;
1029 /* Read a comment consisting of a given number of bytes. */
1031 unsigned int nskip = 0;
1036 if (!(c >= '0' && c <= '9'))
1038 nskip = 10 * nskip + (c - '0');
1043 for (; nskip > 0; nskip--)
1044 if (do_getc () == EOF)
1052 last_non_comment_line = line_number;
1057 case 'S': case 's': /* XEmacs only */
1059 struct object inner;
1060 read_object (&inner, false, new_backquote_flag, null_context);
1061 /* Dots and EOF are not allowed here.
1063 free_object (&inner);
1065 last_non_comment_line = line_number;
1069 case '0': case '1': case '2': case '3': case '4':
1070 case '5': case '6': case '7': case '8': case '9':
1071 /* Read Common Lisp style #n# or #n=. */
1075 if (!(c >= '0' && c <= '9'))
1079 /* Invalid input. Be tolerant, no error message. */
1086 read_object (op, false, new_backquote_flag, outer_context);
1087 last_non_comment_line = line_number;
1093 last_non_comment_line = line_number;
1096 if (c == 'R' || c == 'r')
1098 /* Read an integer. */
1100 if (c == '+' || c == '-')
1102 for (; c != EOF; c = do_getc ())
1109 last_non_comment_line = line_number;
1112 /* Invalid input. Be tolerant, no error message. */
1114 last_non_comment_line = line_number;
1121 /* Read an integer. */
1123 if (c == '+' || c == '-')
1125 for (; c != EOF; c = do_getc ())
1132 last_non_comment_line = line_number;
1136 case '*': /* XEmacs only */
1138 /* Read a bit-vector. */
1141 while (c == '0' || c == '1');
1145 last_non_comment_line = line_number;
1149 case '+': /* XEmacs only */
1150 case '-': /* XEmacs only */
1151 /* Simply assume every feature expression is true. */
1153 struct object inner;
1154 read_object (&inner, false, new_backquote_flag, null_context);
1155 /* Dots and EOF are not allowed here.
1157 free_object (&inner);
1162 /* Invalid input. Be tolerant, no error message. */
1164 last_non_comment_line = line_number;
1176 if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
1177 || strchr ("\"'`,(", c) != NULL)
1180 last_non_comment_line = line_number;
1188 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
1194 op->token = XMALLOC (struct token);
1195 symbol = read_token (op->token, c);
1198 op->type = t_symbol;
1199 last_non_comment_line = line_number;
1204 free_token (op->token);
1207 last_non_comment_line = line_number;
1217 extract_elisp (FILE *f,
1218 const char *real_filename, const char *logical_filename,
1219 flag_context_list_table_ty *flag_table,
1220 msgdomain_list_ty *mdlp)
1222 mlp = mdlp->item[0]->messages;
1225 real_file_name = real_filename;
1226 logical_file_name = xstrdup (logical_filename);
1229 last_comment_line = -1;
1230 last_non_comment_line = -1;
1232 flag_context_list_table = flag_table;
1236 /* Eat tokens until eof is seen. When read_object returns
1237 due to an unbalanced closing parenthesis, just restart it. */
1240 struct object toplevel_object;
1242 read_object (&toplevel_object, false, false, null_context);
1244 if (toplevel_object.type == t_eof)
1247 free_object (&toplevel_object);
1251 /* Close scanner. */
1253 real_file_name = NULL;
1254 logical_file_name = NULL;