1 /* xgettext Emacs Lisp backend.
2 Copyright (C) 2001-2003, 2005-2009 Free Software Foundation, Inc.
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
40 #define _(s) gettext(s)
43 /* Summary of Emacs Lisp syntax:
44 - ';' starts a comment until end of line.
45 - '#@nn' starts a comment of nn bytes.
46 - Integers are constituted of an optional prefix (#b, #B for binary,
47 #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
48 an optional sign (+ or -), the digits, and an optional trailing dot.
49 - Characters are written as '?' followed by the character, possibly
50 with an escape sequence, for examples '?a', '?\n', '?\177'.
51 - Strings are delimited by double quotes. Backslash introduces an escape
52 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
53 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
54 - Symbols: can contain meta-characters if preceded by backslash.
55 - Uninterned symbols: written as #:SYMBOL.
58 The reader is implemented in emacs-21.1/src/lread.c. */
61 /* ====================== Keyword set customization. ====================== */
63 /* If true extract all strings. */
64 static bool extract_all = false;
66 static hash_table keywords;
67 static bool default_keywords = true;
71 x_elisp_extract_all ()
78 x_elisp_keyword (const char *name)
81 default_keywords = false;
85 struct callshape shape;
88 if (keywords.table == NULL)
89 hash_init (&keywords, 100);
91 split_keywordspec (name, &end, &shape);
93 /* The characters between name and end should form a valid Lisp
95 colon = strchr (name, ':');
96 if (colon == NULL || colon >= end)
97 insert_keyword_callshape (&keywords, name, end - name, &shape);
101 /* Finish initializing the keywords hash table.
102 Called after argument processing, before each file is processed. */
106 if (default_keywords)
108 /* When adding new keywords here, also update the documentation in
110 x_elisp_keyword ("_");
111 default_keywords = false;
116 init_flag_table_elisp ()
118 xgettext_record_flag ("_:1:pass-elisp-format");
119 xgettext_record_flag ("format:1:elisp-format");
123 /* ======================== Reading of characters. ======================== */
125 /* Real filename, used in error messages about the input file. */
126 static const char *real_file_name;
128 /* Logical filename and line number, used to label the extracted messages. */
129 static char *logical_file_name;
130 static int line_number;
132 /* The input file stream. */
136 /* Fetch the next character from the input file. */
145 error (EXIT_FAILURE, errno, _("\
146 error while reading \"%s\""), real_file_name);
154 /* Put back the last fetched character, not EOF. */
164 /* ========================== Reading of tokens. ========================== */
167 /* A token consists of a sequence of characters. */
170 int allocated; /* number of allocated 'token_char's */
171 int charcount; /* number of used 'token_char's */
172 char *chars; /* the token's constituents */
175 /* Initialize a 'struct token'. */
177 init_token (struct token *tp)
180 tp->chars = XNMALLOC (tp->allocated, char);
184 /* Free the memory pointed to by a 'struct token'. */
186 free_token (struct token *tp)
191 /* Ensure there is enough room in the token for one more character. */
193 grow_token (struct token *tp)
195 if (tp->charcount == tp->allocated)
198 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
202 /* Test whether a token has integer syntax. */
204 is_integer (const char *p)
206 /* NB: Yes, '+.' and '-.' both designate the integer 0. */
207 const char *p_start = p;
209 if (*p == '+' || *p == '-')
213 while (*p >= '0' && *p <= '9')
215 if (p > p_start && *p == '.')
220 /* Test whether a token has float syntax. */
222 is_float (const char *p)
224 enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
228 if (*p == '+' || *p == '-')
230 if (*p >= '0' && *p <= '9')
235 while (*p >= '0' && *p <= '9');
242 if (*p >= '0' && *p <= '9')
247 while (*p >= '0' && *p <= '9');
249 if (*p == 'e' || *p == 'E')
253 if (*p == '+' || *p == '-')
255 if (*p >= '0' && *p <= '9')
260 while (*p >= '0' && *p <= '9');
262 else if (p[-1] == '+'
263 && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
264 || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
271 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
272 || state == (DOT_CHAR | TRAIL_INT)
273 || state == (LEAD_INT | E_CHAR | EXP_INT)
274 || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
275 || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
278 /* Read the next token. 'first' is the first character, which has already
279 been read. Returns true for a symbol, false for a number. */
281 read_token (struct token *tp, int first)
290 for (;; c = do_getc ())
294 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
296 if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
297 || c == '[' || c == ']' || c == '#')
304 /* Invalid, but be tolerant. */
308 tp->chars[tp->charcount++] = c;
314 return true; /* symbol */
316 /* Add a NUL byte at the end, for is_integer and is_float. */
318 tp->chars[tp->charcount] = '\0';
320 if (is_integer (tp->chars) || is_float (tp->chars))
321 return false; /* number */
323 return true; /* symbol */
327 /* ========================= Accumulating comments ========================= */
331 static size_t bufmax;
332 static size_t buflen;
343 if (buflen >= bufmax)
345 bufmax = 2 * bufmax + 10;
346 buffer = xrealloc (buffer, bufmax);
348 buffer[buflen++] = c;
352 comment_line_end (size_t chars_to_remove)
354 buflen -= chars_to_remove;
356 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
358 if (chars_to_remove == 0 && buflen >= bufmax)
360 bufmax = 2 * bufmax + 10;
361 buffer = xrealloc (buffer, bufmax);
363 buffer[buflen] = '\0';
364 savable_comment_add (buffer);
368 /* These are for tracking whether comments count as immediately before
370 static int last_comment_line;
371 static int last_non_comment_line;
374 /* ========================= Accumulating messages ========================= */
377 static message_list_ty *mlp;
380 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
383 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
384 Other objects need not to be represented precisely. */
387 t_symbol, /* symbol */
388 t_string, /* string */
389 t_other, /* other kind of real object */
390 t_dot, /* '.' pseudo object */
391 t_listclose, /* ')' pseudo object */
392 t_vectorclose,/* ']' pseudo object */
393 t_eof /* EOF marker */
398 enum object_type type;
399 struct token *token; /* for t_symbol and t_string */
400 int line_number_at_start; /* for t_string */
403 /* Free the memory pointed to by a 'struct object'. */
405 free_object (struct object *op)
407 if (op->type == t_symbol || op->type == t_string)
409 free_token (op->token);
414 /* Convert a t_symbol/t_string token to a char*. */
416 string_of_object (const struct object *op)
421 if (!(op->type == t_symbol || op->type == t_string))
423 n = op->token->charcount;
424 str = XNMALLOC (n + 1, char);
425 memcpy (str, op->token->chars, n);
430 /* Context lookup table. */
431 static flag_context_list_table_ty *flag_context_list_table;
433 /* Returns the character represented by an escape sequence. */
434 #define IGNORABLE_ESCAPE (EOF - 1)
436 do_getc_escaped (int c, bool in_string)
460 return IGNORABLE_ESCAPE;
463 return (in_string ? IGNORABLE_ESCAPE : ' ');
470 /* Invalid input. But be tolerant. */
480 c = do_getc_escaped (c, false);
484 case 'S': /* shift */
489 /* Invalid input. But be tolerant. */
499 c = do_getc_escaped (c, false);
501 return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
503 case 'H': /* hyper */
505 case 's': /* super */
510 /* Invalid input. But be tolerant. */
520 c = do_getc_escaped (c, false);
529 /* Invalid input. But be tolerant. */
541 c = do_getc_escaped (c, false);
545 if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
547 if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
549 #if 0 /* We cannot handle NUL bytes in strings. */
555 case '0': case '1': case '2': case '3': case '4':
556 case '5': case '6': case '7':
557 /* An octal escape, as in ANSI C. */
564 if (c >= '0' && c <= '7')
566 n = (n << 3) + (c - '0');
570 if (c >= '0' && c <= '7')
571 n = (n << 3) + (c - '0');
579 return (unsigned char) n;
583 /* A hexadecimal escape, as in ANSI C. */
592 else if (c >= '0' && c <= '9')
593 n = (n << 4) + (c - '0');
594 else if (c >= 'A' && c <= 'F')
595 n = (n << 4) + (c - 'A' + 10);
596 else if (c >= 'a' && c <= 'f')
597 n = (n << 4) + (c - 'a' + 10);
604 return (unsigned char) n;
608 /* Ignore Emacs multibyte character stuff. All the strings we are
609 interested in are ASCII strings. */
614 /* Read the next object.
615 'first_in_list' and 'new_backquote_flag' are used for reading old
616 backquote syntax and new backquote syntax. */
618 read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
619 flag_context_ty outer_context)
634 /* Comments assumed to be grouped with a message must immediately
635 precede it, with no non-whitespace token on a line between
637 if (last_non_comment_line > last_comment_line)
638 savable_comment_reset ();
643 int arg = 0; /* Current argument number. */
644 flag_context_list_iterator_ty context_iter;
645 const struct callshapes *shapes = NULL;
646 struct arglist_parser *argparser = NULL;
651 flag_context_ty inner_context;
654 inner_context = null_context;
657 inherited_context (outer_context,
658 flag_context_list_iterator_advance (
661 read_object (&inner, arg == 0, new_backquote_flag,
664 /* Recognize end of list. */
665 if (inner.type == t_listclose)
668 /* Don't bother converting "()" to "NIL". */
669 last_non_comment_line = line_number;
670 if (argparser != NULL)
671 arglist_parser_done (argparser, arg);
675 /* Dots are not allowed in every position. ']' is not allowed.
678 /* EOF inside list is illegal. But be tolerant. */
679 if (inner.type == t_eof)
684 /* This is the function position. */
685 if (inner.type == t_symbol)
687 char *symbol_name = string_of_object (&inner);
690 if (hash_find_entry (&keywords,
691 symbol_name, strlen (symbol_name),
694 shapes = (const struct callshapes *) keyword_value;
696 argparser = arglist_parser_alloc (mlp, shapes);
699 flag_context_list_iterator (
700 flag_context_list_table_lookup (
701 flag_context_list_table,
702 symbol_name, strlen (symbol_name)));
707 context_iter = null_context_list_iterator;
711 /* These are the argument positions. */
712 if (argparser != NULL && inner.type == t_string)
713 arglist_parser_remember (argparser, arg,
714 string_of_object (&inner),
717 inner.line_number_at_start,
721 free_object (&inner);
724 if (argparser != NULL)
725 arglist_parser_done (argparser, arg);
728 last_non_comment_line = line_number;
732 /* Tell the caller about the end of list.
733 Unmatched closing parenthesis is illegal. But be tolerant. */
734 op->type = t_listclose;
735 last_non_comment_line = line_number;
744 read_object (&inner, false, new_backquote_flag, null_context);
746 /* Recognize end of vector. */
747 if (inner.type == t_vectorclose)
750 last_non_comment_line = line_number;
754 /* Dots and ')' are not allowed. But be tolerant. */
756 /* EOF inside vector is illegal. But be tolerant. */
757 if (inner.type == t_eof)
760 free_object (&inner);
764 last_non_comment_line = line_number;
768 /* Tell the caller about the end of vector.
769 Unmatched closing bracket is illegal. But be tolerant. */
770 op->type = t_vectorclose;
771 last_non_comment_line = line_number;
778 read_object (&inner, false, new_backquote_flag, null_context);
780 /* Dots and EOF are not allowed here. But be tolerant. */
782 free_object (&inner);
785 last_non_comment_line = line_number;
795 read_object (&inner, false, true, null_context);
797 /* Dots and EOF are not allowed here. But be tolerant. */
799 free_object (&inner);
802 last_non_comment_line = line_number;
807 if (!new_backquote_flag)
811 /* The ,@ handling inside lists is wrong anyway, because
812 ,@form expands to an unknown number of elements. */
813 if (c != EOF && c != '@' && c != '.')
819 read_object (&inner, false, false, null_context);
821 /* Dots and EOF are not allowed here. But be tolerant. */
823 free_object (&inner);
826 last_non_comment_line = line_number;
832 bool all_semicolons = true;
834 last_comment_line = line_number;
839 if (c == EOF || c == '\n')
842 all_semicolons = false;
845 /* We skip all leading white space, but not EOLs. */
846 if (!(buflen == 0 && (c == ' ' || c == '\t')))
850 comment_line_end (0);
856 op->token = XMALLOC (struct token);
857 init_token (op->token);
858 op->line_number_at_start = line_number;
863 /* Invalid input. Be tolerant, no error message. */
871 /* Invalid input. Be tolerant, no error message. */
873 c = do_getc_escaped (c, true);
875 /* Invalid input. Be tolerant, no error message. */
877 if (c == IGNORABLE_ESCAPE)
878 /* Ignore escaped newline and escaped space. */
882 grow_token (op->token);
883 op->token->chars[op->token->charcount++] = c;
888 grow_token (op->token);
889 op->token->chars[op->token->charcount++] = c;
898 pos.file_name = logical_file_name;
899 pos.line_number = op->line_number_at_start;
900 remember_a_message (mlp, NULL, string_of_object (op),
901 null_context, &pos, NULL, savable_comment);
903 last_non_comment_line = line_number;
910 /* Invalid input. Be tolerant, no error message. */
916 /* Invalid input. Be tolerant, no error message. */
920 c = do_getc_escaped (c, false);
922 /* Invalid input. Be tolerant, no error message. */
926 /* Impossible to deal with Emacs multibyte character stuff here. */
928 last_non_comment_line = line_number;
932 /* Dispatch macro handling. */
935 /* Invalid input. Be tolerant, no error message. */
949 /* Read a char table, same syntax as a vector. */
954 read_object (&inner, false, new_backquote_flag,
957 /* Recognize end of vector. */
958 if (inner.type == t_vectorclose)
961 last_non_comment_line = line_number;
965 /* Dots and ')' are not allowed. But be tolerant. */
967 /* EOF inside vector is illegal. But be tolerant. */
968 if (inner.type == t_eof)
971 free_object (&inner);
974 last_non_comment_line = line_number;
978 /* Invalid input. Be tolerant, no error message. */
982 last_non_comment_line = line_number;
987 /* Read a bit vector. */
989 struct object length;
990 read_object (&length, first_in_list, new_backquote_flag,
992 /* Dots and EOF are not allowed here.
994 free_object (&length);
999 struct object string;
1000 read_object (&string, first_in_list, new_backquote_flag,
1002 free_object (&string);
1005 /* Invalid input. Be tolerant, no error message. */
1008 last_non_comment_line = line_number;
1012 /* Read a compiled function, same syntax as a vector. */
1014 /* Read a string with properties, same syntax as a list. */
1016 struct object inner;
1018 read_object (&inner, false, new_backquote_flag, null_context);
1019 /* Dots and EOF are not allowed here.
1021 free_object (&inner);
1023 last_non_comment_line = line_number;
1028 /* Read a comment consisting of a given number of bytes. */
1030 unsigned int nskip = 0;
1035 if (!(c >= '0' && c <= '9'))
1037 nskip = 10 * nskip + (c - '0');
1042 for (; nskip > 0; nskip--)
1043 if (do_getc () == EOF)
1051 last_non_comment_line = line_number;
1056 case 'S': case 's': /* XEmacs only */
1058 struct object inner;
1059 read_object (&inner, false, new_backquote_flag, null_context);
1060 /* Dots and EOF are not allowed here.
1062 free_object (&inner);
1064 last_non_comment_line = line_number;
1068 case '0': case '1': case '2': case '3': case '4':
1069 case '5': case '6': case '7': case '8': case '9':
1070 /* Read Common Lisp style #n# or #n=. */
1074 if (!(c >= '0' && c <= '9'))
1078 /* Invalid input. Be tolerant, no error message. */
1085 read_object (op, false, new_backquote_flag, outer_context);
1086 last_non_comment_line = line_number;
1092 last_non_comment_line = line_number;
1095 if (c == 'R' || c == 'r')
1097 /* Read an integer. */
1099 if (c == '+' || c == '-')
1101 for (; c != EOF; c = do_getc ())
1108 last_non_comment_line = line_number;
1111 /* Invalid input. Be tolerant, no error message. */
1113 last_non_comment_line = line_number;
1120 /* Read an integer. */
1122 if (c == '+' || c == '-')
1124 for (; c != EOF; c = do_getc ())
1131 last_non_comment_line = line_number;
1135 case '*': /* XEmacs only */
1137 /* Read a bit-vector. */
1140 while (c == '0' || c == '1');
1144 last_non_comment_line = line_number;
1148 case '+': /* XEmacs only */
1149 case '-': /* XEmacs only */
1150 /* Simply assume every feature expression is true. */
1152 struct object inner;
1153 read_object (&inner, false, new_backquote_flag, null_context);
1154 /* Dots and EOF are not allowed here.
1156 free_object (&inner);
1161 /* Invalid input. Be tolerant, no error message. */
1163 last_non_comment_line = line_number;
1175 if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
1176 || strchr ("\"'`,(", c) != NULL)
1179 last_non_comment_line = line_number;
1187 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
1193 op->token = XMALLOC (struct token);
1194 symbol = read_token (op->token, c);
1197 op->type = t_symbol;
1198 last_non_comment_line = line_number;
1203 free_token (op->token);
1206 last_non_comment_line = line_number;
1216 extract_elisp (FILE *f,
1217 const char *real_filename, const char *logical_filename,
1218 flag_context_list_table_ty *flag_table,
1219 msgdomain_list_ty *mdlp)
1221 mlp = mdlp->item[0]->messages;
1224 real_file_name = real_filename;
1225 logical_file_name = xstrdup (logical_filename);
1228 last_comment_line = -1;
1229 last_non_comment_line = -1;
1231 flag_context_list_table = flag_table;
1235 /* Eat tokens until eof is seen. When read_object returns
1236 due to an unbalanced closing parenthesis, just restart it. */
1239 struct object toplevel_object;
1241 read_object (&toplevel_object, false, false, null_context);
1243 if (toplevel_object.type == t_eof)
1246 free_object (&toplevel_object);
1250 /* Close scanner. */
1252 real_file_name = NULL;
1253 logical_file_name = NULL;