Bug 563742 – introspection should record the introduced version of
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40
41 #undef YY_BUF_SIZE
42 #define YY_BUF_SIZE 65536
43
44 extern int yylex (GISourceScanner *scanner);
45 #define YY_DECL int yylex (GISourceScanner *scanner)
46 static int yywrap (void);
47 static void parse_comment (GISourceScanner *scanner);
48 static void process_directive (GISourceScanner *scanner);
49 static int check_identifier (GISourceScanner *scanner, const char *);
50 static int parse_ignored_macro (void);
51 %}
52
53 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
54 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
55 exppart                                 [eE][-+]?[0-9]+
56 floatsuffix                             [fFlL]
57 chartext                                ([^\\\'])|(\\.) 
58 stringtext                              ([^\\\"])|(\\.)
59
60 %%
61
62 "\n"                                    { ++lineno; } /* " */
63 "\\\n"                                  { ++lineno; }
64 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
65
66 "/*"                                    { parse_comment(scanner); }
67 "//".*                                  { }
68
69 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
70 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
71
72 "#"                                     { process_directive(scanner); }
73
74 "{"                                     { return '{'; }
75 "<%"                                    { return '{'; }
76 "}"                                     { return '}'; }
77 "%>"                                    { return '}'; }
78 "["                                     { return '['; }
79 "<:"                                    { return '['; }
80 "]"                                     { return ']'; }
81 ":>"                                    { return ']'; }
82 "("                                     { return '('; }
83 ")"                                     { return ')'; }
84 ";"                                     { return ';'; }
85 ":"                                     { return ':'; }
86 "..."                                   { return ELLIPSIS; }
87 "?"                                     { return '?'; }
88 "."                                     { return '.'; }
89 "+"                                     { return '+'; }
90 "-"                                     { return '-'; }
91 "*"                                     { return '*'; }
92 "/"                                     { return '/'; }
93 "%"                                     { return '%'; }
94 "^"                                     { return '^'; }
95 "&"                                     { return '&'; }
96 "|"                                     { return '|'; }
97 "~"                                     { return '~'; }
98 "!"                                     { return '!'; }
99 "="                                     { return '='; }
100 "<"                                     { return '<'; }
101 ">"                                     { return '>'; }
102 "+="                                    { return ADDEQ; }
103 "-="                                    { return SUBEQ; }
104 "*="                                    { return MULEQ; }
105 "/="                                    { return DIVEQ; }
106 "%="                                    { return MODEQ; }
107 "^="                                    { return XOREQ; }
108 "&="                                    { return ANDEQ; }
109 "|="                                    { return OREQ; }
110 "<<"                                    { return SL; }
111 ">>"                                    { return SR; }
112 "<<="                                   { return SLEQ; }
113 ">>="                                   { return SREQ; }
114 "=="                                    { return EQ; }
115 "!="                                    { return NOTEQ; }
116 "<="                                    { return LTEQ; }
117 ">="                                    { return GTEQ; }
118 "&&"                                    { return ANDAND; }
119 "||"                                    { return OROR; }
120 "++"                                    { return PLUSPLUS; }
121 "--"                                    { return MINUSMINUS; }
122 ","                                     { return ','; }
123 "->"                                    { return ARROW; }
124
125 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
126 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
127 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
128 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
129 "__const"                               { return CONST; }
130 "__extension__"                         { return EXTENSION; }
131 "__inline"                              { return INLINE; }
132 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
133 "__signed__"                            { return SIGNED; }
134 "__restrict"                            { return RESTRICT; }
135 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
136 "_Bool"                                 { return BOOL; }
137
138 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
139
140 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
141 "auto"                                  { return AUTO; }
142 "break"                                 { return BREAK; }
143 "case"                                  { return CASE; }
144 "char"                                  { return CHAR; }
145 "const"                                 { return CONST; }
146 "continue"                              { return CONTINUE; }
147 "default"                               { return DEFAULT; }
148 "do"                                    { return DO; }
149 "double"                                { return DOUBLE; }
150 "else"                                  { return ELSE; }
151 "enum"                                  { return ENUM; }
152 "extern"                                { return EXTERN; }
153 "float"                                 { return FLOAT; }
154 "for"                                   { return FOR; }
155 "goto"                                  { return GOTO; }
156 "if"                                    { return IF; }
157 "inline"                                { return INLINE; }
158 "int"                                   { return INT; }
159 "long"                                  { return LONG; }
160 "register"                              { return REGISTER; }
161 "restrict"                              { return RESTRICT; }
162 "return"                                { return RETURN; }
163 "short"                                 { return SHORT; }
164 "signed"                                { return SIGNED; }
165 "sizeof"                                { return SIZEOF; }
166 "static"                                { return STATIC; }
167 "struct"                                { return STRUCT; }
168 "switch"                                { return SWITCH; }
169 "typedef"                               { return TYPEDEF; }
170 "union"                                 { return UNION; }
171 "unsigned"                              { return UNSIGNED; }
172 "void"                                  { return VOID; }
173 "volatile"                              { return VOLATILE; }
174 "while"                                 { return WHILE; }
175
176 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
177
178 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
179 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
180 [0-9]+{intsuffix}?                      { return INTEGER; }
181
182 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
183 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
184
185 "'"{chartext}*"'"                       { return CHARACTER; }
186 "L'"{chartext}*"'"                      { return CHARACTER; }
187
188 "\""{stringtext}*"\""                   { return STRING; }
189 "L\""{stringtext}*"\""                  { return STRING; }
190
191 .                                       { fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
192
193 %%
194
195 static int
196 yywrap (void)
197 {
198   return 1;
199 }
200
201
202 static void
203 parse_gtkdoc (GISourceScanner *scanner,
204               gchar           *symbol,
205               int             *c1,
206               int             *c2)
207 {
208   gboolean isline = FALSE;
209   GString *line_buf;
210   char *line;
211   gchar **parts;
212   GISourceDirective *directive;
213   char *name,*value;
214   GSList *directives;
215   GSList *options = NULL;
216   char *rname;
217   int n_parts;
218
219   line_buf = g_string_new ("");
220
221   do 
222     {
223       *c1 = *c2;
224       if (*c1 == '\n')
225         {
226           isline = TRUE;
227           break;
228         }
229       g_string_append_c (line_buf, *c1);
230       *c2 = input();
231     } while (*c2 != EOF && !(*c1 == '*' && *c2 == '/'));
232   
233   if (!isline)
234     {
235       g_string_free (line_buf, TRUE);
236       return;
237     }
238
239   line = g_string_free (line_buf, FALSE);
240
241   /* Ignore lines that don't have a : - this is a hack but avoids
242    * trying to parse too many things as annotations
243    */
244   if (!strchr (line, ':'))
245     {
246       g_free (line);
247       return;
248     }
249
250   parts = g_strsplit (line, ":", 3);
251   n_parts = g_strv_length (parts);
252
253   if (g_ascii_strcasecmp (parts[0], "eprecated") == 0)
254     {
255       if (n_parts == 3)
256         options = g_slist_prepend (options, g_strdup_printf ("%s: %s", parts[1], parts[2]));
257       else if (n_parts == 2)
258         options = g_slist_prepend (options, g_strdup (parts[1]));
259       else
260         options = g_slist_prepend (options, g_strdup (""));
261       name = parts[0];
262       value = NULL;
263     }
264   else if (g_ascii_strcasecmp (parts[0], "ince") == 0)
265     {
266       if (n_parts == 2)
267         options = g_slist_prepend (options, g_strdup (parts[1]));
268       else
269         options = g_slist_prepend (options, g_strdup (""));
270       name = parts[0];
271       value = NULL;
272     }
273   else if (n_parts >= 2)
274     {
275       name = parts[0];
276
277       if (n_parts == 3) 
278         {
279           char *ptr = g_strdup (parts[1]);
280           char *start;
281           char *end;
282
283           options = NULL;
284           start = strchr (ptr, '(');
285           while (start != NULL) 
286             {
287               end = strchr (start, ')');
288               if (end)
289                 {
290                   options = g_slist_prepend (options, g_strndup (start+1, end-(start+1)));
291                   start = strchr (end+1, '(');
292                 }
293               else
294                 {
295                   break;
296                 }
297             }
298           g_free (ptr);
299           value = parts[2];
300         } 
301       else
302         value = parts[1];
303     }
304   else /* parts == 1 */
305     {
306       name = parts[0];
307       value = NULL;
308     }
309
310   /*
311    * Special cases for global annotations.
312    * Context-sensitive parsing would probably be the right way to go.
313    */
314   if (g_ascii_strncasecmp ("eturn", name, 5) == 0)
315     rname = "return";
316   else if (g_ascii_strncasecmp ("eprecated", name, 9) == 0)
317     rname = "deprecated";
318   else if (g_ascii_strncasecmp ("ince", name, 4) == 0)
319     rname = "since";
320   else
321     rname = name;
322
323   directive = gi_source_directive_new (rname, value, options);
324   directives = g_hash_table_lookup (scanner->directives_map, symbol);
325   directives = g_slist_prepend (directives, directive);
326   g_hash_table_replace (scanner->directives_map,
327                         g_strdup (symbol), directives);
328
329   g_strfreev (parts);
330   g_free (line);
331 }
332
333
334 static void
335 parse_comment (GISourceScanner *scanner)
336 {
337   GString *symbol = NULL;
338   gboolean startofline = FALSE, have_symbol = FALSE, start1 = FALSE, start_symbol = FALSE;
339   int c1, c2;
340
341   c1 = input();
342   c2 = input();
343
344   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
345     {
346       if (c1 == ':')
347         have_symbol = TRUE;
348       else if (c1 == '\n')
349          start1 = TRUE;
350       else if (c1 == '*' && start1)
351          start_symbol = TRUE;
352       else if (!have_symbol && start_symbol) 
353         {
354           if (!symbol)
355             symbol = g_string_new ("");
356           if (c1 != ' ')
357             g_string_append_c (symbol, c1);
358         }
359
360       if (c1 == '\n') 
361         {
362           ++lineno;
363           startofline = TRUE;
364         }
365
366       c1 = c2;
367       c2 = input();
368
369       if ((c1 != '*' && c1 != ' '))
370           startofline = FALSE;
371
372       if (startofline && (c1 == ' ') && ((c2 == '@') || (c2 == 'r') || (c2 == 'R') || (c2 == 'D') || (c2 == 'S')))
373         {
374            c1 = c2;
375            c2 = input();
376            if (symbol)
377              parse_gtkdoc (scanner, symbol->str, &c1, &c2);
378         }
379     }
380
381   if (symbol)
382     g_string_free (symbol, TRUE);
383   
384 }
385
386 static int
387 check_identifier (GISourceScanner *scanner,
388                   const char  *s)
389 {
390         /*
391          * This function checks if `s' is a type name or an
392          * identifier.
393          */
394
395         if (gi_source_scanner_is_typedef (scanner, s)) {
396                 return TYPEDEF_NAME;
397         } else if (strcmp (s, "__builtin_va_list") == 0) {
398                 return TYPEDEF_NAME;
399         }
400
401         return IDENTIFIER;
402 }
403
404 static void
405 process_directive (GISourceScanner *scanner)
406 {
407         /* extract current filename from #line directives */
408         GString *filename_builder;
409         gboolean in_string, found_filename;
410
411         lineno = 0;
412         found_filename = FALSE;
413         in_string = FALSE;
414         filename_builder = g_string_new ("");
415
416         int c = input ();
417         while (c != EOF && c != '\n') {
418                 if (!in_string) {
419                         if (c == '\"') {
420                                 in_string = TRUE;
421                                 found_filename = TRUE;
422                         } else if (c >= '0' && c <= '9') {
423                                 if (!found_filename) {
424                                         lineno = lineno * 10 + (c - '0');
425                                 }
426                         }
427                 } else {
428                         if (c == '\"') {
429                                 in_string = FALSE;
430                         } else if (c == '\\') {
431                                 g_string_append_c (filename_builder, c);
432                                 c = input ();
433                                 g_string_append_c (filename_builder, c);
434                         } else {
435                                 g_string_append_c (filename_builder, c);
436                         }
437                 }
438                 c = input ();
439         }
440
441         if (filename_builder->len > 0) {
442                 char *filename = g_strcompress (filename_builder->str);
443                 if (g_realpath (filename))
444                   {
445                     g_free (scanner->current_filename);
446                     scanner->current_filename = g_realpath (filename);
447                     g_assert (scanner->current_filename);
448                     g_free(filename);
449                   }
450         }
451
452         g_string_free (filename_builder, TRUE);
453 }
454
455 /*
456  * This parses a macro which is ignored, such as
457  * __attribute__((x)) or __asm__ (x)
458  */
459 static int
460 parse_ignored_macro (void)
461 {
462         int c;
463         int nest;
464
465         while ((c = input ()) != EOF && isspace (c))
466                 ;
467         if (c != '(')
468                 return FALSE;
469
470         nest = 0;
471         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
472                 if (c == '(')
473                         nest++;
474                 else if (c == ')')
475                         nest--;
476                 else if (c == '"') {
477                         while ((c = input ()) != EOF && c != '"') {
478                                 if (c == '\\')
479                                         c = input ();
480                         }
481                 } else if (c == '\'') {
482                         c = input ();
483                         if (c == '\\')
484                                 c = input ();
485                         else if (c == '\'')
486                                 return FALSE;
487                         c = input ();
488                         if (c != '\'')
489                                 return FALSE;
490                 } else if (c == '\n')
491                         lineno++;
492         }
493
494         return TRUE;
495 }