[scanner] Support private/public directives
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40 char linebuf[2000];
41
42 #undef YY_BUF_SIZE
43 #define YY_BUF_SIZE 1048576
44
45 extern int yylex (GISourceScanner *scanner);
46 #define YY_DECL int yylex (GISourceScanner *scanner)
47 static int yywrap (void);
48 static void parse_comment (GISourceScanner *scanner);
49 static void process_linemarks (GISourceScanner *scanner);
50 static int check_identifier (GISourceScanner *scanner, const char *);
51 static int parse_ignored_macro (void);
52 %}
53
54 %option nounput
55
56 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
57 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
58 exppart                                 [eE][-+]?[0-9]+
59 floatsuffix                             [fFlL]
60 chartext                                ([^\\\'])|(\\.)
61 stringtext                              ([^\\\"])|(\\.)
62
63 %%
64
65 \n.*                                    { strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
66                                                 linebuf[sizeof(linebuf)-1]='\0';
67                                                 /* printf("%4d:%s\n",lineno,linebuf); */
68                                                 yyless(1);      /* give back all but the \n to rescan */
69                                                 ++lineno;
70                                         }
71 "\\\n"                                  { ++lineno; }
72 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
73
74 "/*"                                    { parse_comment(scanner); }
75 "/*"[\t ]*<[\t ]*"private"[\t ]*>" */"  { scanner->private = TRUE; }
76 "/*"[\t ]*<[\t ]*"public"[\t ]*>" */"   { scanner->private = FALSE; }
77 "//".*                                  { }
78
79 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
80 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
81
82 "# "[0-9]+" ".*"\n"                     { process_linemarks(scanner); }
83 "#"                                     { }
84 "{"                                     { return '{'; }
85 "<%"                                    { return '{'; }
86 "}"                                     { return '}'; }
87 "%>"                                    { return '}'; }
88 "["                                     { return '['; }
89 "<:"                                    { return '['; }
90 "]"                                     { return ']'; }
91 ":>"                                    { return ']'; }
92 "("                                     { return '('; }
93 ")"                                     { return ')'; }
94 ";"                                     { return ';'; }
95 ":"                                     { return ':'; }
96 "..."                                   { return ELLIPSIS; }
97 "?"                                     { return '?'; }
98 "."                                     { return '.'; }
99 "+"                                     { return '+'; }
100 "-"                                     { return '-'; }
101 "*"                                     { return '*'; }
102 "/"                                     { return '/'; }
103 "%"                                     { return '%'; }
104 "^"                                     { return '^'; }
105 "&"                                     { return '&'; }
106 "|"                                     { return '|'; }
107 "~"                                     { return '~'; }
108 "!"                                     { return '!'; }
109 "="                                     { return '='; }
110 "<"                                     { return '<'; }
111 ">"                                     { return '>'; }
112 "+="                                    { return ADDEQ; }
113 "-="                                    { return SUBEQ; }
114 "*="                                    { return MULEQ; }
115 "/="                                    { return DIVEQ; }
116 "%="                                    { return MODEQ; }
117 "^="                                    { return XOREQ; }
118 "&="                                    { return ANDEQ; }
119 "|="                                    { return OREQ; }
120 "<<"                                    { return SL; }
121 ">>"                                    { return SR; }
122 "<<="                                   { return SLEQ; }
123 ">>="                                   { return SREQ; }
124 "=="                                    { return EQ; }
125 "!="                                    { return NOTEQ; }
126 "<="                                    { return LTEQ; }
127 ">="                                    { return GTEQ; }
128 "&&"                                    { return ANDAND; }
129 "||"                                    { return OROR; }
130 "++"                                    { return PLUSPLUS; }
131 "--"                                    { return MINUSMINUS; }
132 ","                                     { return ','; }
133 "->"                                    { return ARROW; }
134
135 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
136 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
137 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
138 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
139 "__const"                               { return CONST; }
140 "__extension__"                         { return EXTENSION; }
141 "__inline"                              { return INLINE; }
142 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
143 "__signed__"                            { return SIGNED; }
144 "__restrict"                            { return RESTRICT; }
145 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
146 "_Bool"                                 { return BOOL; }
147
148 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
149
150 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
151 "auto"                                  { return AUTO; }
152 "break"                                 { return BREAK; }
153 "case"                                  { return CASE; }
154 "char"                                  { return CHAR; }
155 "const"                                 { return CONST; }
156 "continue"                              { return CONTINUE; }
157 "default"                               { return DEFAULT; }
158 "do"                                    { return DO; }
159 "double"                                { return DOUBLE; }
160 "else"                                  { return ELSE; }
161 "enum"                                  { return ENUM; }
162 "extern"                                { return EXTERN; }
163 "float"                                 { return FLOAT; }
164 "for"                                   { return FOR; }
165 "goto"                                  { return GOTO; }
166 "if"                                    { return IF; }
167 "inline"                                { return INLINE; }
168 "int"                                   { return INT; }
169 "long"                                  { return LONG; }
170 "register"                              { return REGISTER; }
171 "restrict"                              { return RESTRICT; }
172 "return"                                { return RETURN; }
173 "short"                                 { return SHORT; }
174 "signed"                                { return SIGNED; }
175 "sizeof"                                { return SIZEOF; }
176 "static"                                { return STATIC; }
177 "struct"                                { return STRUCT; }
178 "switch"                                { return SWITCH; }
179 "typedef"                               { return TYPEDEF; }
180 "union"                                 { return UNION; }
181 "unsigned"                              { return UNSIGNED; }
182 "void"                                  { return VOID; }
183 "volatile"                              { return VOLATILE; }
184 "while"                                 { return WHILE; }
185
186 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
187
188 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
189 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
190 [0-9]+{intsuffix}?                      { return INTEGER; }
191
192 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
193 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
194
195 "'"{chartext}*"'"                       { return CHARACTER; }
196 "L'"{chartext}*"'"                      { return CHARACTER; }
197
198 "\""{stringtext}*"\""                   { return STRING; }
199 "L\""{stringtext}*"\""                  { return STRING; }
200
201 .                                       { if (yytext[0]) fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
202
203 %%
204
205 static int
206 yywrap (void)
207 {
208   return 1;
209 }
210
211
212 static void
213 parse_comment (GISourceScanner *scanner)
214 {
215   GString *string;
216   int c1, c2;
217   GISourceComment *comment;
218   int comment_lineno;
219
220   c1 = input();
221   c2 = input();
222
223   string = g_string_new ("");
224
225   comment_lineno = lineno;
226
227   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
228     {
229       g_string_append_c (string, c1);
230
231       if (c1 == '\n')
232         lineno++;
233
234       c1 = c2;
235       c2 = input();
236
237     }
238
239   comment = g_slice_new (GISourceComment);
240   comment->comment = g_string_free (string, FALSE);
241   comment->line = comment_lineno;
242   comment->filename = g_strdup(scanner->current_filename);
243
244   scanner->comments = g_slist_prepend (scanner->comments,
245                                        comment);
246 }
247
248 static int
249 check_identifier (GISourceScanner *scanner,
250                   const char  *s)
251 {
252         /*
253          * This function checks if `s' is a type name or an
254          * identifier.
255          */
256
257         if (gi_source_scanner_is_typedef (scanner, s)) {
258                 return TYPEDEF_NAME;
259         } else if (strcmp (s, "__builtin_va_list") == 0) {
260                 return TYPEDEF_NAME;
261         }
262
263         return IDENTIFIER;
264 }
265
266 /*
267  * # linenum "filename" flags
268  *  See http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
269  **/
270
271 static void
272 process_linemarks (GISourceScanner *scanner)
273 {
274         char filename[1025];
275         char *compress;
276         char *real;
277
278         sscanf(yytext, "# %d \"%1024[^\"]\"", &lineno, filename);
279
280         compress = g_strcompress (filename);
281         real = g_realpath (filename);
282         if (real) {
283                 g_free (scanner->current_filename);
284                 scanner->current_filename = real;
285         } else {
286                 g_free (real);
287         }
288         g_free (compress);
289 }
290
291 /*
292  * This parses a macro which is ignored, such as
293  * __attribute__((x)) or __asm__ (x)
294  */
295 static int
296 parse_ignored_macro (void)
297 {
298         int c;
299         int nest;
300
301         while ((c = input ()) != EOF && isspace (c))
302                 ;
303         if (c != '(')
304                 return FALSE;
305
306         nest = 0;
307         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
308                 if (c == '(')
309                         nest++;
310                 else if (c == ')')
311                         nest--;
312                 else if (c == '"') {
313                         while ((c = input ()) != EOF && c != '"') {
314                                 if (c == '\\')
315                                         c = input ();
316                         }
317                 } else if (c == '\'') {
318                         c = input ();
319                         if (c == '\\')
320                                 c = input ();
321                         else if (c == '\'')
322                                 return FALSE;
323                         c = input ();
324                         if (c != '\'')
325                                 return FALSE;
326                 } else if (c == '\n')
327                         lineno++;
328         }
329
330         return TRUE;
331 }