Silently skip #pragma directives when scanning sources.
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40 char linebuf[2000];
41
42 #undef YY_BUF_SIZE
43 #define YY_BUF_SIZE 1048576
44
45 extern int yylex (GISourceScanner *scanner);
46 #define YY_DECL int yylex (GISourceScanner *scanner)
47 static int yywrap (void);
48 static void parse_comment (GISourceScanner *scanner);
49 static void process_linemarks (GISourceScanner *scanner);
50 static int check_identifier (GISourceScanner *scanner, const char *);
51 static int parse_ignored_macro (void);
52 %}
53
54 %option nounput
55
56 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
57 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
58 exppart                                 [eE][-+]?[0-9]+
59 floatsuffix                             [fFlL]
60 chartext                                ([^\\\'])|(\\.)
61 stringtext                              ([^\\\"])|(\\.)
62
63 %%
64
65 \n.*                                    { strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
66                                                 linebuf[sizeof(linebuf)-1]='\0';
67                                                 /* printf("%4d:%s\n",lineno,linebuf); */
68                                                 yyless(1);      /* give back all but the \n to rescan */
69                                                 ++lineno;
70                                         }
71 "\\\n"                                  { ++lineno; }
72 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
73
74 "/*"                                    { parse_comment(scanner); }
75 "/*"[\t ]*<[\t ]*"private"[\t ]*>" */"  { scanner->private = TRUE; }
76 "/*"[\t ]*<[\t ]*"public"[\t ]*>" */"   { scanner->private = FALSE; }
77 "//".*                                  { }
78
79 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
80 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
81 "#pragma ".*"\n"                        { /* Ignore pragma. */ }
82
83 "# "[0-9]+" ".*"\n"                     { process_linemarks(scanner); }
84 "#"                                     { }
85 "{"                                     { return '{'; }
86 "<%"                                    { return '{'; }
87 "}"                                     { return '}'; }
88 "%>"                                    { return '}'; }
89 "["                                     { return '['; }
90 "<:"                                    { return '['; }
91 "]"                                     { return ']'; }
92 ":>"                                    { return ']'; }
93 "("                                     { return '('; }
94 ")"                                     { return ')'; }
95 ";"                                     { return ';'; }
96 ":"                                     { return ':'; }
97 "..."                                   { return ELLIPSIS; }
98 "?"                                     { return '?'; }
99 "."                                     { return '.'; }
100 "+"                                     { return '+'; }
101 "-"                                     { return '-'; }
102 "*"                                     { return '*'; }
103 "/"                                     { return '/'; }
104 "%"                                     { return '%'; }
105 "^"                                     { return '^'; }
106 "&"                                     { return '&'; }
107 "|"                                     { return '|'; }
108 "~"                                     { return '~'; }
109 "!"                                     { return '!'; }
110 "="                                     { return '='; }
111 "<"                                     { return '<'; }
112 ">"                                     { return '>'; }
113 "+="                                    { return ADDEQ; }
114 "-="                                    { return SUBEQ; }
115 "*="                                    { return MULEQ; }
116 "/="                                    { return DIVEQ; }
117 "%="                                    { return MODEQ; }
118 "^="                                    { return XOREQ; }
119 "&="                                    { return ANDEQ; }
120 "|="                                    { return OREQ; }
121 "<<"                                    { return SL; }
122 ">>"                                    { return SR; }
123 "<<="                                   { return SLEQ; }
124 ">>="                                   { return SREQ; }
125 "=="                                    { return EQ; }
126 "!="                                    { return NOTEQ; }
127 "<="                                    { return LTEQ; }
128 ">="                                    { return GTEQ; }
129 "&&"                                    { return ANDAND; }
130 "||"                                    { return OROR; }
131 "++"                                    { return PLUSPLUS; }
132 "--"                                    { return MINUSMINUS; }
133 ","                                     { return ','; }
134 "->"                                    { return ARROW; }
135
136 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
137 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
138 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
139 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
140 "__const"                               { return CONST; }
141 "__extension__"                         { return EXTENSION; }
142 "__inline"                              { return INLINE; }
143 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
144 "__signed__"                            { return SIGNED; }
145 "__restrict"                            { return RESTRICT; }
146 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
147 "_Bool"                                 { return BOOL; }
148
149 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
150
151 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
152 "auto"                                  { return AUTO; }
153 "break"                                 { return BREAK; }
154 "case"                                  { return CASE; }
155 "char"                                  { return CHAR; }
156 "const"                                 { return CONST; }
157 "continue"                              { return CONTINUE; }
158 "default"                               { return DEFAULT; }
159 "do"                                    { return DO; }
160 "double"                                { return DOUBLE; }
161 "else"                                  { return ELSE; }
162 "enum"                                  { return ENUM; }
163 "extern"                                { return EXTERN; }
164 "float"                                 { return FLOAT; }
165 "for"                                   { return FOR; }
166 "goto"                                  { return GOTO; }
167 "if"                                    { return IF; }
168 "inline"                                { return INLINE; }
169 "int"                                   { return INT; }
170 "long"                                  { return LONG; }
171 "register"                              { return REGISTER; }
172 "restrict"                              { return RESTRICT; }
173 "return"                                { return RETURN; }
174 "short"                                 { return SHORT; }
175 "signed"                                { return SIGNED; }
176 "sizeof"                                { return SIZEOF; }
177 "static"                                { return STATIC; }
178 "struct"                                { return STRUCT; }
179 "switch"                                { return SWITCH; }
180 "typedef"                               { return TYPEDEF; }
181 "union"                                 { return UNION; }
182 "unsigned"                              { return UNSIGNED; }
183 "void"                                  { return VOID; }
184 "volatile"                              { return VOLATILE; }
185 "while"                                 { return WHILE; }
186
187 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
188
189 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
190 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
191 [0-9]+{intsuffix}?                      { return INTEGER; }
192
193 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
194 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
195
196 "'"{chartext}*"'"                       { return CHARACTER; }
197 "L'"{chartext}*"'"                      { return CHARACTER; }
198
199 "\""{stringtext}*"\""                   { return STRING; }
200 "L\""{stringtext}*"\""                  { return STRING; }
201
202 .                                       { if (yytext[0]) fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
203
204 %%
205
206 static int
207 yywrap (void)
208 {
209   return 1;
210 }
211
212
213 static void
214 parse_comment (GISourceScanner *scanner)
215 {
216   GString *string;
217   int c1, c2;
218   GISourceComment *comment;
219   int comment_lineno;
220
221   c1 = input();
222   c2 = input();
223
224   string = g_string_new ("");
225
226   comment_lineno = lineno;
227
228   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
229     {
230       g_string_append_c (string, c1);
231
232       if (c1 == '\n')
233         lineno++;
234
235       c1 = c2;
236       c2 = input();
237
238     }
239
240   comment = g_slice_new (GISourceComment);
241   comment->comment = g_string_free (string, FALSE);
242   comment->line = comment_lineno;
243   comment->filename = g_strdup(scanner->current_filename);
244
245   scanner->comments = g_slist_prepend (scanner->comments,
246                                        comment);
247 }
248
249 static int
250 check_identifier (GISourceScanner *scanner,
251                   const char  *s)
252 {
253         /*
254          * This function checks if `s' is a type name or an
255          * identifier.
256          */
257
258         if (gi_source_scanner_is_typedef (scanner, s)) {
259                 return TYPEDEF_NAME;
260         } else if (strcmp (s, "__builtin_va_list") == 0) {
261                 return TYPEDEF_NAME;
262         }
263
264         return IDENTIFIER;
265 }
266
267 /*
268  * # linenum "filename" flags
269  *  See http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
270  **/
271
272 static void
273 process_linemarks (GISourceScanner *scanner)
274 {
275         char filename[1025];
276         char *compress;
277         char *real;
278
279         sscanf(yytext, "# %d \"%1024[^\"]\"", &lineno, filename);
280
281         compress = g_strcompress (filename);
282         real = g_realpath (filename);
283         if (real) {
284                 g_free (scanner->current_filename);
285                 scanner->current_filename = real;
286         } else {
287                 g_free (real);
288         }
289         g_free (compress);
290 }
291
292 /*
293  * This parses a macro which is ignored, such as
294  * __attribute__((x)) or __asm__ (x)
295  */
296 static int
297 parse_ignored_macro (void)
298 {
299         int c;
300         int nest;
301
302         while ((c = input ()) != EOF && isspace (c))
303                 ;
304         if (c != '(')
305                 return FALSE;
306
307         nest = 0;
308         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
309                 if (c == '(')
310                         nest++;
311                 else if (c == ')')
312                         nest--;
313                 else if (c == '"') {
314                         while ((c = input ()) != EOF && c != '"') {
315                                 if (c == '\\')
316                                         c = input ();
317                         }
318                 } else if (c == '\'') {
319                         c = input ();
320                         if (c == '\\')
321                                 c = input ();
322                         else if (c == '\'')
323                                 return FALSE;
324                         c = input ();
325                         if (c != '\'')
326                                 return FALSE;
327                 } else if (c == '\n')
328                         lineno++;
329         }
330
331         return TRUE;
332 }