scanner: more verbose parser error messages
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40 char linebuf[2000];
41
42 #undef YY_BUF_SIZE
43 #define YY_BUF_SIZE 1048576
44
45 extern int yylex (GISourceScanner *scanner);
46 #define YY_DECL int yylex (GISourceScanner *scanner)
47 static int yywrap (void);
48 static void parse_comment (GISourceScanner *scanner);
49 static void process_directive (GISourceScanner *scanner);
50 static int check_identifier (GISourceScanner *scanner, const char *);
51 static int parse_ignored_macro (void);
52 %}
53
54 %option nounput
55
56 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
57 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
58 exppart                                 [eE][-+]?[0-9]+
59 floatsuffix                             [fFlL]
60 chartext                                ([^\\\'])|(\\.) 
61 stringtext                              ([^\\\"])|(\\.)
62
63 %%
64
65 \n.*                                    { strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
66                                                 linebuf[sizeof(linebuf)-1]='\0';
67                                                 /* printf("%4d:%s\n",lineno,linebuf); */
68                                                 yyless(1);      /* give back all but the \n to rescan */
69                                                 ++lineno;
70                                         }
71 "\\\n"                                  { ++lineno; }
72 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
73
74 "/*"                                    { parse_comment(scanner); }
75 "//".*                                  { }
76
77 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
78 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
79
80 "#"                                     { process_directive(scanner); }
81
82 "{"                                     { return '{'; }
83 "<%"                                    { return '{'; }
84 "}"                                     { return '}'; }
85 "%>"                                    { return '}'; }
86 "["                                     { return '['; }
87 "<:"                                    { return '['; }
88 "]"                                     { return ']'; }
89 ":>"                                    { return ']'; }
90 "("                                     { return '('; }
91 ")"                                     { return ')'; }
92 ";"                                     { return ';'; }
93 ":"                                     { return ':'; }
94 "..."                                   { return ELLIPSIS; }
95 "?"                                     { return '?'; }
96 "."                                     { return '.'; }
97 "+"                                     { return '+'; }
98 "-"                                     { return '-'; }
99 "*"                                     { return '*'; }
100 "/"                                     { return '/'; }
101 "%"                                     { return '%'; }
102 "^"                                     { return '^'; }
103 "&"                                     { return '&'; }
104 "|"                                     { return '|'; }
105 "~"                                     { return '~'; }
106 "!"                                     { return '!'; }
107 "="                                     { return '='; }
108 "<"                                     { return '<'; }
109 ">"                                     { return '>'; }
110 "+="                                    { return ADDEQ; }
111 "-="                                    { return SUBEQ; }
112 "*="                                    { return MULEQ; }
113 "/="                                    { return DIVEQ; }
114 "%="                                    { return MODEQ; }
115 "^="                                    { return XOREQ; }
116 "&="                                    { return ANDEQ; }
117 "|="                                    { return OREQ; }
118 "<<"                                    { return SL; }
119 ">>"                                    { return SR; }
120 "<<="                                   { return SLEQ; }
121 ">>="                                   { return SREQ; }
122 "=="                                    { return EQ; }
123 "!="                                    { return NOTEQ; }
124 "<="                                    { return LTEQ; }
125 ">="                                    { return GTEQ; }
126 "&&"                                    { return ANDAND; }
127 "||"                                    { return OROR; }
128 "++"                                    { return PLUSPLUS; }
129 "--"                                    { return MINUSMINUS; }
130 ","                                     { return ','; }
131 "->"                                    { return ARROW; }
132
133 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
134 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
135 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
136 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
137 "__const"                               { return CONST; }
138 "__extension__"                         { return EXTENSION; }
139 "__inline"                              { return INLINE; }
140 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
141 "__signed__"                            { return SIGNED; }
142 "__restrict"                            { return RESTRICT; }
143 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
144 "_Bool"                                 { return BOOL; }
145
146 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
147
148 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
149 "auto"                                  { return AUTO; }
150 "break"                                 { return BREAK; }
151 "case"                                  { return CASE; }
152 "char"                                  { return CHAR; }
153 "const"                                 { return CONST; }
154 "continue"                              { return CONTINUE; }
155 "default"                               { return DEFAULT; }
156 "do"                                    { return DO; }
157 "double"                                { return DOUBLE; }
158 "else"                                  { return ELSE; }
159 "enum"                                  { return ENUM; }
160 "extern"                                { return EXTERN; }
161 "float"                                 { return FLOAT; }
162 "for"                                   { return FOR; }
163 "goto"                                  { return GOTO; }
164 "if"                                    { return IF; }
165 "inline"                                { return INLINE; }
166 "int"                                   { return INT; }
167 "long"                                  { return LONG; }
168 "register"                              { return REGISTER; }
169 "restrict"                              { return RESTRICT; }
170 "return"                                { return RETURN; }
171 "short"                                 { return SHORT; }
172 "signed"                                { return SIGNED; }
173 "sizeof"                                { return SIZEOF; }
174 "static"                                { return STATIC; }
175 "struct"                                { return STRUCT; }
176 "switch"                                { return SWITCH; }
177 "typedef"                               { return TYPEDEF; }
178 "union"                                 { return UNION; }
179 "unsigned"                              { return UNSIGNED; }
180 "void"                                  { return VOID; }
181 "volatile"                              { return VOLATILE; }
182 "while"                                 { return WHILE; }
183
184 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
185
186 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
187 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
188 [0-9]+{intsuffix}?                      { return INTEGER; }
189
190 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
191 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
192
193 "'"{chartext}*"'"                       { return CHARACTER; }
194 "L'"{chartext}*"'"                      { return CHARACTER; }
195
196 "\""{stringtext}*"\""                   { return STRING; }
197 "L\""{stringtext}*"\""                  { return STRING; }
198
199 .                                       { if (yytext[0]) fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
200
201 %%
202
203 static int
204 yywrap (void)
205 {
206   return 1;
207 }
208
209
210 static void
211 parse_comment (GISourceScanner *scanner)
212 {
213   GString *comment;
214   int c1, c2;
215
216   c1 = input();
217   c2 = input();
218
219   comment = g_string_new ("");
220
221   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
222     {
223       g_string_append_c (comment, c1);
224
225       if (c1 == '\n')
226         lineno++;
227
228       c1 = c2;
229       c2 = input();
230
231     }
232
233   scanner->comments = g_slist_prepend (scanner->comments,
234                                        g_string_free (comment, FALSE));
235 }
236
237 static int
238 check_identifier (GISourceScanner *scanner,
239                   const char  *s)
240 {
241         /*
242          * This function checks if `s' is a type name or an
243          * identifier.
244          */
245
246         if (gi_source_scanner_is_typedef (scanner, s)) {
247                 return TYPEDEF_NAME;
248         } else if (strcmp (s, "__builtin_va_list") == 0) {
249                 return TYPEDEF_NAME;
250         }
251
252         return IDENTIFIER;
253 }
254
255 static void
256 process_directive (GISourceScanner *scanner)
257 {
258         /* extract current filename from #line directives */
259         GString *filename_builder;
260         gboolean in_string, found_filename;
261
262         lineno = 0;
263         found_filename = FALSE;
264         in_string = FALSE;
265         filename_builder = g_string_new ("");
266
267         int c = input ();
268         while (c != EOF && c != '\n') {
269                 if (!in_string) {
270                         if (c == '\"') {
271                                 in_string = TRUE;
272                                 found_filename = TRUE;
273                         } else if (c >= '0' && c <= '9') {
274                                 if (!found_filename) {
275                                         lineno = lineno * 10 + (c - '0');
276                                 }
277                         }
278                 } else {
279                         if (c == '\"') {
280                                 in_string = FALSE;
281                         } else if (c == '\\') {
282                                 g_string_append_c (filename_builder, c);
283                                 c = input ();
284                                 g_string_append_c (filename_builder, c);
285                         } else {
286                                 g_string_append_c (filename_builder, c);
287                         }
288                 }
289                 c = input ();
290         }
291
292         if (filename_builder->len > 0) {
293                 char *filename = g_strcompress (filename_builder->str);
294                 if (g_realpath (filename))
295                   {
296                     g_free (scanner->current_filename);
297                     scanner->current_filename = g_realpath (filename);
298                     g_assert (scanner->current_filename);
299                     g_free(filename);
300                   }
301         }
302
303         g_string_free (filename_builder, TRUE);
304 }
305
306 /*
307  * This parses a macro which is ignored, such as
308  * __attribute__((x)) or __asm__ (x)
309  */
310 static int
311 parse_ignored_macro (void)
312 {
313         int c;
314         int nest;
315
316         while ((c = input ()) != EOF && isspace (c))
317                 ;
318         if (c != '(')
319                 return FALSE;
320
321         nest = 0;
322         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
323                 if (c == '(')
324                         nest++;
325                 else if (c == ')')
326                         nest--;
327                 else if (c == '"') {
328                         while ((c = input ()) != EOF && c != '"') {
329                                 if (c == '\\')
330                                         c = input ();
331                         }
332                 } else if (c == '\'') {
333                         c = input ();
334                         if (c == '\\')
335                                 c = input ();
336                         else if (c == '\'')
337                                 return FALSE;
338                         c = input ();
339                         if (c != '\'')
340                                 return FALSE;
341                 } else if (c == '\n')
342                         lineno++;
343         }
344
345         return TRUE;
346 }