1 /* rclex.c -- lexer for Windows rc files parser */
3 /* Copyright (C) 1997-2019 Free Software Foundation, Inc.
5 Written by Kai Tietz, Onevision.
7 This file is part of GNU Binutils.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
25 /* This is a lexer used by the Windows rc file parser. It basically
26 just recognized a bunch of keywords. */
31 #include "libiberty.h"
32 #include "safe-ctype.h"
38 /* Whether we are in rcdata mode, in which we returns the lengths of
41 static int rcdata_mode;
43 /* Whether we are suppressing lines from cpp (including windows.h or
44 headers from your C sources may bring in externs and typedefs).
45 When active, we return IGNORED_TOKEN, which lets us ignore these
46 outside of resource constructs. Thus, it isn't required to protect
47 all the non-preprocessor lines in your header files with #ifdef
48 RC_INVOKED. It also means your RC file can't include other RC
49 files if they're named "*.h". Sorry. Name them *.rch or whatever. */
51 static int suppress_cpp_data;
53 #define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
55 /* The first filename we detect in the cpp output. We use this to
56 tell included files from the original file. */
58 static char *initial_fn;
60 /* List of allocated strings. */
64 struct alloc_string *next;
68 static struct alloc_string *strings;
76 #define K(KEY) { #KEY, KEY }
77 #define KRT(KEY) { #KEY, RT_##KEY }
79 static const struct rclex_keywords keywds[] =
81 K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
82 K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
83 K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
84 K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
85 K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
86 K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
87 K(DLGINCLUDE), K(DLGINIT),
88 K(EDITTEXT), K(END), K(EXSTYLE),
89 K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
90 K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
91 K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
92 K(HEDIT), K(HELP), K(HTML),
93 K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
94 K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
95 K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
96 K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
98 K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
99 K(PURE), K(PUSHBOX), K(PUSHBUTTON),
100 K(RADIOBUTTON), K(RCDATA), K(RTEXT),
101 K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
102 K(STRINGTABLE), K(STYLE),
105 K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
110 /* External input stream from resrc */
111 extern FILE *cpp_pipe;
113 /* Lexical scanner helpers. */
114 static int rclex_lastch = -1;
115 static size_t rclex_tok_max = 0;
116 static size_t rclex_tok_pos = 0;
117 static char *rclex_tok = NULL;
120 rclex_translatekeyword (const char *key)
122 if (key && ISUPPER (key[0]))
124 const struct rclex_keywords *kw = &keywds[0];
128 if (! strcmp (kw->name, key))
132 while (kw->name != NULL);
137 /* Handle a C preprocessor line. */
142 const char *s = rclex_tok;
151 /* Check for #pragma code_page ( DEFAULT | <nr>). */
153 mlen = strlen ("pragma");
154 if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
162 mlen = strlen ("code_page");
163 if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
164 /* FIXME: We ought to issue a warning message about an unrecognised pragma. */
170 /* FIXME: We ought to issue an error message about a malformed pragma. */
175 if (*s == 0 || (end = strchr (s, ')')) == NULL)
176 /* FIXME: We ought to issue an error message about a malformed pragma. */
178 len = (size_t) (end - s);
179 fn = xmalloc (len + 1);
183 while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
185 if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
186 wind_current_codepage = wind_default_codepage;
191 if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
192 ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
194 ncp = (rc_uint_type) strtol (fn, NULL, 10);
195 if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
196 fatal (_("invalid value specified for pragma code_page.\n"));
197 wind_current_codepage = ncp;
203 line = strtol (s, &send, 0);
204 if (*send != '\0' && ! ISSPACE (*send))
207 /* Subtract 1 because we are about to count the newline. */
208 rc_lineno = line - 1;
218 send = strchr (s, '"');
222 fn = xmalloc (send - s + 1);
223 strncpy (fn, s, send - s);
231 initial_fn = xmalloc (strlen (fn) + 1);
232 strcpy (initial_fn, fn);
235 /* Allow the initial file, regardless of name. Suppress all other
236 files if they end in ".h" (this allows included "*.rc"). */
237 if (strcmp (initial_fn, fn) == 0
238 || strcmp (fn + strlen (fn) - 2, ".h") != 0)
239 suppress_cpp_data = 0;
241 suppress_cpp_data = 1;
244 /* Allocate a string of a given length. */
249 struct alloc_string *as;
251 as = xmalloc (sizeof *as);
252 as->s = xmalloc (len);
260 /* Handle a quoted string. The quotes are stripped. A pair of quotes
261 in a string are turned into a single quote. Adjacent strings are
262 merged separated by whitespace are merged, as in C. */
265 handle_quotes (rc_uint_type *len)
267 const char *input = rclex_tok;
273 ret = get_string (strlen (input) + 1);
287 rcparse_warning ("backslash at end of string");
291 rcparse_warning ("use \"\" to put \" in a string");
297 *s++ = ESCAPE_B; /* Strange, but true... */
335 case '0': case '1': case '2': case '3':
336 case '4': case '5': case '6': case '7':
339 if (*t >= '0' && *t <= '7')
341 ch = (ch << 3) | (*t - '0');
343 if (*t >= '0' && *t <= '7')
345 ch = (ch << 3) | (*t - '0');
355 /* We only handle single byte chars here. Make sure
356 we finish an escape sequence like "/xB0ABC" after
357 the first two digits. */
359 while (num_xdigits--)
361 if (*t >= '0' && *t <= '9')
362 ch = (ch << 4) | (*t - '0');
363 else if (*t >= 'a' && *t <= 'f')
364 ch = (ch << 4) | (*t - 'a' + 10);
365 else if (*t >= 'A' && *t <= 'F')
366 ch = (ch << 4) | (*t - 'A' + 10);
375 rcparse_warning ("unrecognized escape sequence");
383 else if (t[1] == '\0')
385 else if (t[1] == '"')
394 rcparse_warning ("unexpected character after '\"'");
415 /* Allocate a unicode string of a given length. */
418 get_unistring (int len)
420 return (unichar *) get_string (len * sizeof (unichar));
423 /* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
424 in a string are turned into a single quote. Adjacent strings are
425 merged separated by whitespace are merged, as in C. */
428 handle_uniquotes (rc_uint_type *len)
430 const char *input = rclex_tok;
436 ret = get_unistring (strlen (input) + 1);
440 if ((*t == 'L' || *t == 'l') && t[1] == '"')
452 rcparse_warning ("backslash at end of string");
456 rcparse_warning ("use \"\" to put \" in a string");
460 *s++ = ESCAPE_B; /* Strange, but true... */
495 *s++ = (unichar) *t++;
498 case '0': case '1': case '2': case '3':
499 case '4': case '5': case '6': case '7':
502 if (*t >= '0' && *t <= '7')
504 ch = (ch << 3) | (*t - '0');
506 if (*t >= '0' && *t <= '7')
508 ch = (ch << 3) | (*t - '0');
518 /* We only handle two byte chars here. Make sure
519 we finish an escape sequence like "/xB0ABC" after
520 the first two digits. */
522 while (num_xdigits--)
524 if (*t >= '0' && *t <= '9')
525 ch = (ch << 4) | (*t - '0');
526 else if (*t >= 'a' && *t <= 'f')
527 ch = (ch << 4) | (*t - 'a' + 10);
528 else if (*t >= 'A' && *t <= 'F')
529 ch = (ch << 4) | (*t - 'A' + 10);
538 rcparse_warning ("unrecognized escape sequence");
540 *s++ = (unichar) *t++;
545 *s++ = (unichar) *t++;
546 else if (t[1] == '\0')
548 else if (t[1] == '"')
556 assert (ISSPACE (*t));
577 /* Discard all the strings we have allocated. The parser calls this
578 when it no longer needs them. */
581 rcparse_discard_strings (void)
583 struct alloc_string *as;
588 struct alloc_string *n;
599 /* Enter rcdata mode. */
601 rcparse_rcdata (void)
606 /* Go back to normal mode from rcdata mode. */
608 rcparse_normal (void)
614 rclex_tok_add_char (int ch)
616 if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
618 char *h = xmalloc (rclex_tok_max + 9);
624 memcpy (h, rclex_tok, rclex_tok_pos + 1);
633 rclex_tok[rclex_tok_pos++] = (char) ch;
634 rclex_tok[rclex_tok_pos] = 0;
642 if ((r = rclex_lastch) != -1)
649 if (! cpp_pipe || feof (cpp_pipe)
650 || fread (&ch, 1, 1,cpp_pipe) != 1)
652 r = ((int) ch) & 0xff;
654 while (r == 0 || r == '\r');
656 rclex_tok_add_char (r);
665 if ((r = rclex_lastch) == -1)
667 if ((r = rclex_readch ()) != -1)
670 if (rclex_tok_pos > 0)
671 rclex_tok[--rclex_tok_pos] = 0;
682 while ((c = rclex_peekch ()) != -1)
689 if ((c = rclex_peekch ()) == -1 || c == '\n')
693 else if (rclex_readch () == '"')
696 Skip any whitespace after the end of the double quotes. */
707 if (rclex_peekch () == '"')
718 rc_uint_type base = 10;
719 rc_uint_type ret, val;
726 switch (rclex_peekch ())
740 ret = (rc_uint_type) (ch - '0');
741 while ((ch = rclex_peekch ()) != -1)
744 val = (rc_uint_type) (ch - '0');
745 else if (ch >= 'a' && ch <= 'f')
746 val = (rc_uint_type) ((ch - 'a') + 10);
747 else if (ch >= 'A' && ch <= 'F')
748 val = (rc_uint_type) ((ch - 'A') + 10);
752 if (! warned && val >= base)
755 rcparse_warning ("digit exceeds base");
763 /* yyparser entry method. */
773 /* Make sure that rclex_tok is initialized. */
775 rclex_tok_add_char (-1);
785 if ((ch = rclex_readch ()) == -1)
795 while ((ch = rclex_peekch ()) != -1 && ch != '\n')
802 ch = IGNORE_CPP (BEG);
806 ch = IGNORE_CPP (END);
809 case '0': case '1': case '2': case '3': case '4':
810 case '5': case '6': case '7': case '8': case '9':
811 yylval.i.val = read_digit (ch);
813 switch (rclex_peekch ())
820 ch = IGNORE_CPP (NUMBER);
824 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
825 if (ch == IGNORED_TOKEN)
827 s = handle_quotes (&length);
832 yylval.ss.length = length;
837 if (rclex_peekch () == '"')
841 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
842 if (ch == IGNORED_TOKEN)
844 us = handle_uniquotes (&length);
849 yylval.suni.length = length;
856 if (ISIDST (ch) || ch=='$')
858 while ((ch = rclex_peekch ()) != -1
859 && (ISIDNUM (ch) || ch == '$' || ch == '.'
860 || ch == ':' || ch == '\\' || ch == '/'
861 || ch == '_' || ch == '-')
864 ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
867 s = get_string (strlen (rclex_tok) + 1);
868 strcpy (s, rclex_tok);
871 else if (ch == BLOCK)
873 const char *hs = NULL;
882 hs = yylval.s = yylval.ss.s;
887 rcparse_warning ("BLOCK expects a string as argument.");
890 else if (! strcmp (hs, "StringFileInfo"))
891 ch = BLOCKSTRINGFILEINFO;
892 else if (! strcmp (hs, "VarFileInfo"))
893 ch = BLOCKVARFILEINFO;
897 ch = IGNORE_CPP (ch);
901 while (ch == IGNORED_TOKEN);