gimarshallingtests: Add string_ to boxed structure
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  * Copyright (c) 2010 Andreas Rottmann <a.rottmann@gmx.at>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 %{
32 #include <ctype.h>
33 #include <stdio.h>
34
35 #include <glib.h>
36 #include "sourcescanner.h"
37 #include "scannerparser.h"
38 #include "grealpath.h"
39
40 int lineno;
41 char linebuf[2000];
42
43 #undef YY_BUF_SIZE
44 #define YY_BUF_SIZE 1048576
45
46 extern int yylex (GISourceScanner *scanner);
47 #define YY_DECL int yylex (GISourceScanner *scanner)
48 static int yywrap (void);
49 static void parse_comment (GISourceScanner *scanner);
50 static void parse_trigraph (GISourceScanner *scanner);
51 static void process_linemarks (GISourceScanner *scanner);
52 static int check_identifier (GISourceScanner *scanner, const char *);
53 static int parse_ignored_macro (void);
54 %}
55
56 %option nounput
57
58 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
59 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
60 exppart                                 [eE][-+]?[0-9]+
61 floatsuffix                             [fFlL]
62 chartext                                ([^\\\'])|(\\.)
63 stringtext                              ([^\\\"])|(\\.)
64
65 %%
66
67 \n.*                                    { strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
68                                                 linebuf[sizeof(linebuf)-1]='\0';
69                                                 /* printf("%4d:%s\n",lineno,linebuf); */
70                                                 yyless(1);      /* give back all but the \n to rescan */
71                                                 ++lineno;
72                                         }
73 "\\\n"                                  { ++lineno; }
74 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
75
76 "/*"                                    { parse_comment(scanner); }
77 "/*"[\t ]?<[\t ,=A-Za-z0-9_]+>[\t ]?"*/" { parse_trigraph(scanner); }
78 "//".*                                  { /* Ignore C++ style comments. */ }
79
80 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
81 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
82 "#pragma ".*"\n"                        { /* Ignore pragma. */ }
83
84 "# "[0-9]+" ".*"\n"                     { process_linemarks(scanner); }
85 "#"                                     { }
86 "{"                                     { return '{'; }
87 "<%"                                    { return '{'; }
88 "}"                                     { return '}'; }
89 "%>"                                    { return '}'; }
90 "["                                     { return '['; }
91 "<:"                                    { return '['; }
92 "]"                                     { return ']'; }
93 ":>"                                    { return ']'; }
94 "("                                     { return '('; }
95 ")"                                     { return ')'; }
96 ";"                                     { return ';'; }
97 ":"                                     { return ':'; }
98 "..."                                   { return ELLIPSIS; }
99 "?"                                     { return '?'; }
100 "."                                     { return '.'; }
101 "+"                                     { return '+'; }
102 "-"                                     { return '-'; }
103 "*"                                     { return '*'; }
104 "/"                                     { return '/'; }
105 "%"                                     { return '%'; }
106 "^"                                     { return '^'; }
107 "&"                                     { return '&'; }
108 "|"                                     { return '|'; }
109 "~"                                     { return '~'; }
110 "!"                                     { return '!'; }
111 "="                                     { return '='; }
112 "<"                                     { return '<'; }
113 ">"                                     { return '>'; }
114 "+="                                    { return ADDEQ; }
115 "-="                                    { return SUBEQ; }
116 "*="                                    { return MULEQ; }
117 "/="                                    { return DIVEQ; }
118 "%="                                    { return MODEQ; }
119 "^="                                    { return XOREQ; }
120 "&="                                    { return ANDEQ; }
121 "|="                                    { return OREQ; }
122 "<<"                                    { return SL; }
123 ">>"                                    { return SR; }
124 "<<="                                   { return SLEQ; }
125 ">>="                                   { return SREQ; }
126 "=="                                    { return EQ; }
127 "!="                                    { return NOTEQ; }
128 "<="                                    { return LTEQ; }
129 ">="                                    { return GTEQ; }
130 "&&"                                    { return ANDAND; }
131 "||"                                    { return OROR; }
132 "++"                                    { return PLUSPLUS; }
133 "--"                                    { return MINUSMINUS; }
134 ","                                     { return ','; }
135 "->"                                    { return ARROW; }
136
137 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
138 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
139 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
140 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
141 "__const"                               { return CONST; }
142 "__extension__"                         { return EXTENSION; }
143 "__inline"                              { return INLINE; }
144 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
145 "__signed__"                            { return SIGNED; }
146 "__restrict"                            { return RESTRICT; }
147 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
148 "_Bool"                                 { return BOOL; }
149
150 "G_GINT64_CONSTANT"                     { return INTL_CONST; }
151 "G_GUINT64_CONSTANT"                    { return INTUL_CONST; }
152 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return check_identifier(scanner, yytext); else REJECT; }
153
154 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
155 "auto"                                  { return AUTO; }
156 "break"                                 { return BREAK; }
157 "case"                                  { return CASE; }
158 "char"                                  { return CHAR; }
159 "const"                                 { return CONST; }
160 "continue"                              { return CONTINUE; }
161 "default"                               { return DEFAULT; }
162 "do"                                    { return DO; }
163 "double"                                { return DOUBLE; }
164 "else"                                  { return ELSE; }
165 "enum"                                  { return ENUM; }
166 "extern"                                { return EXTERN; }
167 "float"                                 { return FLOAT; }
168 "for"                                   { return FOR; }
169 "goto"                                  { return GOTO; }
170 "if"                                    { return IF; }
171 "inline"                                { return INLINE; }
172 "int"                                   { return INT; }
173 "long"                                  { return LONG; }
174 "register"                              { return REGISTER; }
175 "restrict"                              { return RESTRICT; }
176 "return"                                { return RETURN; }
177 "short"                                 { return SHORT; }
178 "signed"                                { return SIGNED; }
179 "sizeof"                                { return SIZEOF; }
180 "static"                                { return STATIC; }
181 "struct"                                { return STRUCT; }
182 "switch"                                { return SWITCH; }
183 "typedef"                               { return TYPEDEF; }
184 "union"                                 { return UNION; }
185 "unsigned"                              { return UNSIGNED; }
186 "void"                                  { return VOID; }
187 "volatile"                              { return VOLATILE; }
188 "while"                                 { return WHILE; }
189
190 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
191
192 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
193 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
194 [0-9]+{intsuffix}?                      { return INTEGER; }
195
196 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
197 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
198
199 "'"{chartext}*"'"                       { return CHARACTER; }
200 "L'"{chartext}*"'"                      { return CHARACTER; }
201
202 "\""{stringtext}*"\""                   { return STRING; }
203 "L\""{stringtext}*"\""                  { return STRING; }
204
205 .                                       { if (yytext[0]) fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
206
207 %%
208
209 static int
210 yywrap (void)
211 {
212   return 1;
213 }
214
215
216 static void
217 parse_comment (GISourceScanner *scanner)
218 {
219   GString *string = NULL;
220   int c1, c2;
221   GISourceComment *comment;
222   int comment_lineno;
223   int skip = FALSE;
224
225   if (!g_list_find_custom (scanner->filenames,
226                            scanner->current_filename,
227                            (GCompareFunc)g_strcmp0)) {
228       skip = TRUE;
229   } else {
230       string = g_string_new ("/*");
231   }
232
233   c1 = input();
234   c2 = input();
235
236   comment_lineno = lineno;
237
238   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
239     {
240       if (!skip)
241         g_string_append_c (string, c1);
242
243       if (c1 == '\n')
244         lineno++;
245
246       c1 = c2;
247       c2 = input();
248     }
249
250   if (skip) {
251       return;
252   }
253
254   g_string_append (string, "*/");
255
256   comment = g_slice_new (GISourceComment);
257   comment->comment = g_string_free (string, FALSE);
258   comment->line = comment_lineno;
259   comment->filename = g_strdup(scanner->current_filename);
260
261   scanner->comments = g_slist_prepend (scanner->comments,
262                                        comment);
263 }
264
265 static int
266 check_identifier (GISourceScanner *scanner,
267                   const char  *s)
268 {
269         /*
270          * This function checks if `s' is a type name or an
271          * identifier.
272          */
273
274         if (gi_source_scanner_is_typedef (scanner, s)) {
275                 return TYPEDEF_NAME;
276         } else if (strcmp (s, "__builtin_va_list") == 0) {
277                 return TYPEDEF_NAME;
278         }
279
280         return IDENTIFIER;
281 }
282
283 /*
284  * # linenum "filename" flags
285  *  See http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
286  **/
287
288 static void
289 process_linemarks (GISourceScanner *scanner)
290 {
291         char filename[1025];
292         char *compress;
293         char *real;
294
295         sscanf(yytext, "# %d \"%1024[^\"]\"", &lineno, filename);
296
297         compress = g_strcompress (filename);
298         real = g_realpath (filename);
299         if (real) {
300                 g_free (scanner->current_filename);
301                 scanner->current_filename = real;
302         } else {
303                 g_free (real);
304         }
305         g_free (compress);
306 }
307
308 /*
309  * This parses a macro which is ignored, such as
310  * __attribute__((x)) or __asm__ (x)
311  */
312 static int
313 parse_ignored_macro (void)
314 {
315         int c;
316         int nest;
317
318         while ((c = input ()) != EOF && isspace (c))
319                 ;
320         if (c != '(')
321                 return FALSE;
322
323         nest = 0;
324         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
325                 if (c == '(')
326                         nest++;
327                 else if (c == ')')
328                         nest--;
329                 else if (c == '"') {
330                         while ((c = input ()) != EOF && c != '"') {
331                                 if (c == '\\')
332                                         c = input ();
333                         }
334                 } else if (c == '\'') {
335                         c = input ();
336                         if (c == '\\')
337                                 c = input ();
338                         else if (c == '\'')
339                                 return FALSE;
340                         c = input ();
341                         if (c != '\'')
342                                 return FALSE;
343                 } else if (c == '\n')
344                         lineno++;
345         }
346
347         return TRUE;
348 }
349
350 static void
351 parse_trigraph (GISourceScanner *scanner)
352 {
353         char **items;
354         char *start, *end;
355         int i;
356
357         start = g_strstr_len (yytext, yyleng, "<");
358         g_assert (start != NULL);
359         end = g_strstr_len (yytext, yyleng, ">");
360         g_assert (end != NULL);
361         *end = '\0';
362         items = g_strsplit (start + 1, ",", 0);
363         for (i = 0; items[i] != NULL; i++) {
364                 char *item = items[i];
365                 g_strstrip (item);
366                 if (strcmp (item, "public") == 0)
367                         scanner->private = FALSE;
368                 else if (strcmp (item, "private") == 0)
369                         scanner->private = TRUE;
370                 else if (strcmp (item, "flags") == 0)
371                         scanner->flags = TRUE;
372         }
373         g_strfreev (items);
374 }