Imported Upstream version 1.40.0
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  * Copyright (c) 2010 Andreas Rottmann <a.rottmann@gmx.at>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 %{
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <string.h>
35 #ifndef _WIN32
36 #include <limits.h>
37 #endif
38
39 #include <glib.h>
40 #include "sourcescanner.h"
41 #include "scannerparser.h"
42
43 #ifdef USE_WINDOWS
44 #include <windows.h>
45 #endif
46
47 int lineno;
48 char linebuf[2000];
49
50 #undef YY_BUF_SIZE
51 #define YY_BUF_SIZE 1048576
52
53 extern int yylex (GISourceScanner *scanner);
54 #define YY_DECL int yylex (GISourceScanner *scanner)
55 static int yywrap (void);
56 static void parse_comment (GISourceScanner *scanner);
57 static void parse_trigraph (GISourceScanner *scanner);
58 static void process_linemarks (GISourceScanner *scanner);
59 static int check_identifier (GISourceScanner *scanner, const char *);
60 static int parse_ignored_macro (void);
61 static void print_error (GISourceScanner *scanner);
62 %}
63
64 %option nounput
65
66 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
67 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
68 exppart                                 [eE][-+]?[0-9]+
69 floatsuffix                             [fFlL]
70 chartext                                ([^\\\'])|(\\.)
71 stringtext                              ([^\\\"])|(\\.)
72
73 %%
74
75 \n.*                                    { strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
76                                                 linebuf[sizeof(linebuf)-1]='\0';
77                                                 /* printf("%4d:%s\n",lineno,linebuf); */
78                                                 yyless(1);      /* give back all but the \n to rescan */
79                                                 ++lineno;
80                                         }
81 "\\\n"                                  { ++lineno; }
82
83 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
84
85 "/*"                                    { parse_comment(scanner); }
86 "/*"[\t ]?<[\t ,=A-Za-z0-9_]+>[\t ]?"*/" { parse_trigraph(scanner); }
87 "//".*                                  { /* Ignore C++ style comments. */ }
88
89 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
90 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
91 "#ifdef"[\t ]+"__GI_SCANNER__"[\t ]?.*"\n" { return IFDEF_GI_SCANNER; }
92 "#ifndef"[\t ]+"__GI_SCANNER__"[\t ]?.*"\n" { return IFNDEF_GI_SCANNER; }
93 "#ifndef ".*"\n"                        { return IFNDEF_COND; }
94 "#ifdef ".*"\n"                         { return IFDEF_COND; }
95 "#if ".*"\n"                            { return IF_COND; }
96 "#elif ".*"\n"                          { return ELIF_COND; }
97 "#else".*"\n"                           { return ELSE_COND; }
98 "#endif".*"\n"                          { return ENDIF_COND; }
99 "#pragma ".*"\n"                        { /* Ignore pragma. */ }
100
101 "# "[0-9]+" ".*"\n"                     { process_linemarks(scanner); }
102 "#"                                     { }
103 "{"                                     { return '{'; }
104 "<%"                                    { return '{'; }
105 "}"                                     { return '}'; }
106 "%>"                                    { return '}'; }
107 "["                                     { return '['; }
108 "<:"                                    { return '['; }
109 "]"                                     { return ']'; }
110 ":>"                                    { return ']'; }
111 "("                                     { return '('; }
112 ")"                                     { return ')'; }
113 ";"                                     { return ';'; }
114 ":"                                     { return ':'; }
115 "..."                                   { return ELLIPSIS; }
116 "?"                                     { return '?'; }
117 "."                                     { return '.'; }
118 "+"                                     { return '+'; }
119 "-"                                     { return '-'; }
120 "*"                                     { return '*'; }
121 "/"                                     { return '/'; }
122 "%"                                     { return '%'; }
123 "^"                                     { return '^'; }
124 "&"                                     { return '&'; }
125 "|"                                     { return '|'; }
126 "~"                                     { return '~'; }
127 "!"                                     { return '!'; }
128 "="                                     { return '='; }
129 "<"                                     { return '<'; }
130 ">"                                     { return '>'; }
131 "+="                                    { return ADDEQ; }
132 "-="                                    { return SUBEQ; }
133 "*="                                    { return MULEQ; }
134 "/="                                    { return DIVEQ; }
135 "%="                                    { return MODEQ; }
136 "^="                                    { return XOREQ; }
137 "&="                                    { return ANDEQ; }
138 "|="                                    { return OREQ; }
139 "<<"                                    { return SL; }
140 ">>"                                    { return SR; }
141 "<<="                                   { return SLEQ; }
142 ">>="                                   { return SREQ; }
143 "=="                                    { return EQ; }
144 "!="                                    { return NOTEQ; }
145 "<="                                    { return LTEQ; }
146 ">="                                    { return GTEQ; }
147 "&&"                                    { return ANDAND; }
148 "||"                                    { return OROR; }
149 "++"                                    { return PLUSPLUS; }
150 "--"                                    { return MINUSMINUS; }
151 ","                                     { return ','; }
152 "->"                                    { return ARROW; }
153
154 "__asm"[\t\f\v\r ]+"volatile"           { if (!parse_ignored_macro()) REJECT; }
155 "__asm__"[\t\f\v\r ]+"volatile"         { if (!parse_ignored_macro()) REJECT; }
156 "__asm__"[\t\f\v\r ]+"__volatile__"     { if (!parse_ignored_macro()) REJECT; }
157 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
158 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
159 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
160 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
161 "__const"                               { return CONST; }
162 "__extension__"                         { return EXTENSION; }
163 "__inline__"                            { return INLINE; }
164 "__inline"                              { return INLINE; }
165 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
166 "_Noreturn"                             { /* Ignore */ }
167 "__signed__"                            { return SIGNED; }
168 "__restrict"                            { return RESTRICT; }
169 "__restrict__"                          { return RESTRICT; }
170 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
171 "__volatile"                            { return VOLATILE; }
172 "__volatile__"                          { return VOLATILE; }
173 "_Bool"                                 { return BOOL; }
174
175 "G_GINT64_CONSTANT"                     { return INTL_CONST; }
176 "G_GUINT64_CONSTANT"                    { return INTUL_CONST; }
177
178 "TRUE"                                  { return BOOLEAN; }
179 "FALSE"                                 { return BOOLEAN; }
180 "true"                                  { return BOOLEAN; }
181 "false"                                 { return BOOLEAN; }
182
183 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return check_identifier(scanner, yytext); else REJECT; }
184
185 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
186 "auto"                                  { return AUTO; }
187 "break"                                 { return BREAK; }
188 "case"                                  { return CASE; }
189 "char"                                  { return CHAR; }
190 "const"                                 { return CONST; }
191 "continue"                              { return CONTINUE; }
192 "default"                               { return DEFAULT; }
193 "do"                                    { return DO; }
194 "double"                                { return DOUBLE; }
195 "else"                                  { return ELSE; }
196 "enum"                                  { return ENUM; }
197 "extern"                                { return EXTERN; }
198 "float"                                 { return FLOAT; }
199 "for"                                   { return FOR; }
200 "goto"                                  { return GOTO; }
201 "if"                                    { return IF; }
202 "inline"                                { return INLINE; }
203 "int"                                   { return INT; }
204 "__uint128_t"                           { return INT; }
205 "__int128_t"                            { return INT; }
206 "__uint128"                             { return INT; }
207 "__int128"                              { return INT; }
208 "long"                                  { return LONG; }
209 "register"                              { return REGISTER; }
210 "restrict"                              { return RESTRICT; }
211 "return"                                { return RETURN; }
212 "short"                                 { return SHORT; }
213 "signed"                                { return SIGNED; }
214 "sizeof"                                { return SIZEOF; }
215 "static"                                { return STATIC; }
216 "struct"                                { return STRUCT; }
217 "switch"                                { return SWITCH; }
218 "typedef"                               { return TYPEDEF; }
219 "union"                                 { return UNION; }
220 "unsigned"                              { return UNSIGNED; }
221 "void"                                  { return VOID; }
222 "volatile"                              { return VOLATILE; }
223 "while"                                 { return WHILE; }
224
225 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
226
227 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
228 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
229 [0-9]+{intsuffix}?                      { return INTEGER; }
230
231 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
232 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
233
234 "'"{chartext}*"'"                       { return CHARACTER; }
235 "L'"{chartext}*"'"                      { return CHARACTER; }
236
237 "\""{stringtext}*"\""                   { return STRING; }
238 "L\""{stringtext}*"\""                  { return STRING; }
239
240 .                                       { print_error(scanner); }
241
242 %%
243
244 static int
245 yywrap (void)
246 {
247   return 1;
248 }
249
250 static void
251 parse_comment (GISourceScanner *scanner)
252 {
253   int c1, c2;
254   GString *string = NULL;
255   GISourceComment *comment;
256   int comment_lineno;
257   int skip = FALSE;
258
259   c1 = input();
260   c2 = input();
261
262   if (c2 != EOF && (c1 == '*' && c2 != '*' && c2 != '/')) {
263     /*
264      * Store GTK-Doc comment blocks,
265      * starts with one '/' followed by exactly two '*' and not followed by a '/'
266      */
267     if (!g_hash_table_contains (scanner->files, scanner->current_file)) {
268         skip = TRUE;
269     } else {
270         string = g_string_new (yytext);
271     }
272
273     comment_lineno = lineno;
274
275     while (c2 != EOF && !(c1 == '*' && c2 == '/'))
276       {
277         if (!skip)
278           g_string_append_c (string, c1);
279
280         if (c1 == '\n')
281           lineno++;
282
283         c1 = c2;
284         c2 = input();
285       }
286
287     if (skip) {
288         return;
289     }
290
291     g_string_append (string, "*/");
292
293     comment = g_slice_new (GISourceComment);
294     comment->comment = g_string_free (string, FALSE);
295     comment->line = comment_lineno;
296     comment->filename = g_file_get_parse_name (scanner->current_file);
297
298     gi_source_scanner_take_comment (scanner, comment);
299   } else {
300     /*
301      * Ignore all other comment blocks
302      */
303     while (c2 != EOF && !(c1 == '*' && c2 == '/'))
304       {
305         if (c1 == '\n')
306           lineno++;
307
308         c1 = c2;
309         c2 = input();
310       }
311
312     return;
313   }
314 }
315
316 static int
317 check_identifier (GISourceScanner *scanner,
318                   const char  *s)
319 {
320         /*
321          * This function checks if `s' is a type name or an
322          * identifier.
323          */
324
325         if (gi_source_scanner_is_typedef (scanner, s)) {
326                 return TYPEDEF_NAME;
327         } else if (strcmp (s, "__builtin_va_list") == 0) {
328                 return TYPEDEF_NAME;
329         }
330
331         return IDENTIFIER;
332 }
333
334 /* taken from glib/gfileutils.c */
335 #if defined(MAXPATHLEN)
336 #define G_PATH_LENGTH MAXPATHLEN
337 #elif defined(PATH_MAX)
338 #define G_PATH_LENGTH PATH_MAX
339 #elif defined(_PC_PATH_MAX)
340 #define G_PATH_LENGTH sysconf(_PC_PATH_MAX)
341 #else
342 #define G_PATH_LENGTH 2048
343 #endif
344
345 static inline char *
346 _realpath (const char *path)
347 {
348 #ifndef _WIN32
349   char buffer[G_PATH_LENGTH];
350
351   return realpath (path, buffer) ? g_strdup (buffer) : NULL;
352 #else
353   /* We don't want to include <windows.h> as it clashes horribly
354    * with token names from scannerparser.h. So just declare
355    * GetFullPathNameA() here unless we already defined it, like
356    * in giscanner.c.
357    */
358 #ifndef USE_WINDOWS
359   extern __stdcall GetFullPathNameA(const char*, int, char*, char**);
360 #endif
361   char *buffer;
362   char dummy;
363   int rc, len;
364
365   rc = GetFullPathNameA (path, 1, &dummy, NULL);
366   if (rc == 0)
367     {
368       /* Weird failure, so just return the input path as such */
369       return g_strdup (path);
370     }
371
372   len = rc + 1;
373   buffer = g_malloc (len);
374
375   rc = GetFullPathNameA (path, len, buffer, NULL);
376   if (rc == 0 || rc > len)
377     {
378       /* Weird failure again */
379       g_free (buffer);
380       return g_strdup (path);
381     }
382   return buffer;
383 #endif
384 }
385
386 /*
387  * # linenum "filename" flags
388  *  See http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
389  **/
390
391 static void
392 process_linemarks (GISourceScanner *scanner)
393 {
394         char escaped_filename[1025];
395         char *filename;
396         char *real;
397
398         sscanf(yytext, "# %d \"%1024[^\"]\"", &lineno, escaped_filename);
399         filename = g_strcompress (escaped_filename);
400
401         real = _realpath (filename);
402         if (real)
403           {
404             g_free (filename);
405             filename = real;
406           }
407
408         if (scanner->current_file)
409           g_object_unref (scanner->current_file);
410         scanner->current_file = g_file_new_for_path (filename);
411         g_free (filename);
412 }
413
414 /*
415  * This parses a macro which is ignored, such as
416  * __attribute__((x)) or __asm__ (x)
417  */
418 static int
419 parse_ignored_macro (void)
420 {
421         int c;
422         int nest;
423
424         while ((c = input ()) != EOF && isspace (c))
425                 ;
426         if (c != '(')
427                 return FALSE;
428
429         nest = 0;
430         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
431                 if (c == '(')
432                         nest++;
433                 else if (c == ')')
434                         nest--;
435                 else if (c == '"') {
436                         while ((c = input ()) != EOF && c != '"') {
437                                 if (c == '\\')
438                                         c = input ();
439                         }
440                 } else if (c == '\'') {
441                         c = input ();
442                         if (c == '\\')
443                                 c = input ();
444                         else if (c == '\'')
445                                 return FALSE;
446                         c = input ();
447                         if (c != '\'')
448                                 return FALSE;
449                 } else if (c == '\n')
450                         lineno++;
451         }
452
453         return TRUE;
454 }
455
456 static void
457 parse_trigraph (GISourceScanner *scanner)
458 {
459         char **items;
460         char *start, *end;
461         int i;
462
463         start = g_strstr_len (yytext, yyleng, "<");
464         g_assert (start != NULL);
465         end = g_strstr_len (yytext, yyleng, ">");
466         g_assert (end != NULL);
467         *end = '\0';
468         items = g_strsplit (start + 1, ",", 0);
469         for (i = 0; items[i] != NULL; i++) {
470                 char *item = items[i];
471                 g_strstrip (item);
472                 if (strcmp (item, "public") == 0)
473                         scanner->private = FALSE;
474                 else if (strcmp (item, "private") == 0)
475                         scanner->private = TRUE;
476                 else if (strcmp (item, "flags") == 0)
477                         scanner->flags = TRUE;
478         }
479         g_strfreev (items);
480 }
481
482 static void
483 print_error (GISourceScanner *scanner)
484 {
485   if (yytext[0]) {
486     char *filename = g_file_get_parse_name (scanner->current_file);
487     fprintf(stderr, "%s:%d: unexpected character `%c'\n", filename, lineno, yytext[0]);
488     g_free (filename);
489   }
490 }