Allow parenthesis in annotations
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2 /* GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40
41 extern int yylex (GISourceScanner *scanner);
42 #define YY_DECL int yylex (GISourceScanner *scanner)
43 static int yywrap (void);
44 static void parse_comment (GISourceScanner *scanner);
45 static void process_directive (GISourceScanner *scanner);
46 static int check_identifier (GISourceScanner *scanner, const char *);
47 %}
48
49 intsuffix                               ([uU][lL]?)|([lL][uU]?)
50 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
51 exppart                                 [eE][-+]?[0-9]+
52 floatsuffix                             [fFlL]
53 chartext                                ([^\'])|(\\.) 
54 stringtext                              ([^\"])|(\\.)
55
56 %%
57
58 "\n"                                    { ++lineno; } /* " */
59 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
60
61 "/*"                                    { parse_comment(scanner); }
62 "//".*                                  { }
63
64 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
65 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
66
67 "#"                                     { process_directive(scanner); }
68
69 "{"                                     { return '{'; }
70 "<%"                                    { return '{'; }
71 "}"                                     { return '}'; }
72 "%>"                                    { return '}'; }
73 "["                                     { return '['; }
74 "<:"                                    { return '['; }
75 "]"                                     { return ']'; }
76 ":>"                                    { return ']'; }
77 "("                                     { return '('; }
78 ")"                                     { return ')'; }
79 ";"                                     { return ';'; }
80 ":"                                     { return ':'; }
81 "..."                                   { return ELLIPSIS; }
82 "?"                                     { return '?'; }
83 "."                                     { return '.'; }
84 "+"                                     { return '+'; }
85 "-"                                     { return '-'; }
86 "*"                                     { return '*'; }
87 "/"                                     { return '/'; }
88 "%"                                     { return '%'; }
89 "^"                                     { return '^'; }
90 "&"                                     { return '&'; }
91 "|"                                     { return '|'; }
92 "~"                                     { return '~'; }
93 "!"                                     { return '!'; }
94 "="                                     { return '='; }
95 "<"                                     { return '<'; }
96 ">"                                     { return '>'; }
97 "+="                                    { return ADDEQ; }
98 "-="                                    { return SUBEQ; }
99 "*="                                    { return MULEQ; }
100 "/="                                    { return DIVEQ; }
101 "%="                                    { return MODEQ; }
102 "^="                                    { return XOREQ; }
103 "&="                                    { return ANDEQ; }
104 "|="                                    { return OREQ; }
105 "<<"                                    { return SL; }
106 ">>"                                    { return SR; }
107 "<<="                                   { return SLEQ; }
108 ">>="                                   { return SREQ; }
109 "=="                                    { return EQ; }
110 "!="                                    { return NOTEQ; }
111 "<="                                    { return LTEQ; }
112 ">="                                    { return GTEQ; }
113 "&&"                                    { return ANDAND; }
114 "||"                                    { return OROR; }
115 "++"                                    { return PLUSPLUS; }
116 "--"                                    { return MINUSMINUS; }
117 ","                                     { return ','; }
118 "->"                                    { return ARROW; }
119
120 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
121
122 "auto"                                  { return AUTO; }
123 "_Bool"                                 { return BOOL; }
124 "break"                                 { return BREAK; }
125 "case"                                  { return CASE; }
126 "char"                                  { return CHAR; }
127 "const"                                 { return CONST; }
128 "continue"                              { return CONTINUE; }
129 "default"                               { return DEFAULT; }
130 "do"                                    { return DO; }
131 "double"                                { return DOUBLE; }
132 "else"                                  { return ELSE; }
133 "enum"                                  { return ENUM; }
134 "extern"                                { return EXTERN; }
135 "float"                                 { return FLOAT; }
136 "for"                                   { return FOR; }
137 "goto"                                  { return GOTO; }
138 "if"                                    { return IF; }
139 "inline"                                { return INLINE; }
140 "int"                                   { return INT; }
141 "long"                                  { return LONG; }
142 "register"                              { return REGISTER; }
143 "restrict"                              { return RESTRICT; }
144 "return"                                { return RETURN; }
145 "short"                                 { return SHORT; }
146 "signed"                                { return SIGNED; }
147 "sizeof"                                { return SIZEOF; }
148 "static"                                { return STATIC; }
149 "struct"                                { return STRUCT; }
150 "switch"                                { return SWITCH; }
151 "typedef"                               { return TYPEDEF; }
152 "union"                                 { return UNION; }
153 "unsigned"                              { return UNSIGNED; }
154 "void"                                  { return VOID; }
155 "volatile"                              { return VOLATILE; }
156 "while"                                 { return WHILE; }
157
158 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
159
160 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
161 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
162 [0-9]+{intsuffix}?                      { return INTEGER; }
163
164 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
165 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
166
167 "'"{chartext}*"'"                       { return CHARACTER; }
168 "L'"{chartext}*"'"                      { return CHARACTER; }
169
170 "\""{stringtext}*"\""                   { return STRING; }
171 "L\""{stringtext}*"\""                  { return STRING; }
172
173 .                                       { fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
174
175 %%
176
177 static int
178 yywrap (void)
179 {
180   return 1;
181 }
182
183
184 static void
185 parse_gtkdoc (GISourceScanner *scanner,
186               gchar           *symbol,
187               int             *c1,
188               int             *c2)
189 {
190   gboolean isline = FALSE;
191   gchar line[256];
192   int i;
193   gchar **parts;
194   GISourceDirective *directive;
195   char *name,*value;
196   GSList *directives;
197   GSList *options = NULL;
198   char *rname;
199
200   i = 0;
201   do 
202     {
203       *c1 = *c2;
204       if (*c1 == '\n')
205         {
206           isline = TRUE;
207           break;
208         }
209       if (i >= 256)
210         break;
211       line[i++] = *c1;
212       *c2 = input();
213     } while (*c2 != EOF && !(*c1 == '*' && *c2 == '/'));
214   
215   if (!isline)
216     return;
217
218   line[i] = '\0';
219
220   parts = g_strsplit (line, ": ", 3);
221
222   if (g_strv_length (parts) >= 2)
223     {
224       name = parts[0];
225
226       if (g_strv_length (parts) == 3) 
227         {
228           char *ptr = parts[1];
229           GString *current = NULL;
230           gint8 pstack = 0;
231
232           current = g_string_new ("");
233           value = parts[2];
234
235           do
236             {
237               if (*ptr == '(')
238                 {
239                   pstack++;
240                   if (pstack == 1)
241                     continue;
242                 }
243               else if (*ptr == ')')
244                 pstack--;
245                        
246               if (pstack == 0)
247                 {
248                   options = g_slist_prepend (options, current->str);
249                   break;
250                 }
251               g_string_append_c (current, *ptr);
252             }
253           while (*ptr++);
254
255           g_string_free (current, FALSE);
256         } 
257       else
258         value = parts[1];
259     }
260   else /* parts == 1 */
261     {
262       name = parts[0];
263       value = NULL;
264     }
265
266   /*
267    * This is a special case for return values, name will only be
268    * 'eturn' or a valid name, check the call site.
269    * Context-sensitive parsing would probably be the right way to go
270    */
271   if (g_ascii_strncasecmp ("eturn", name, 5) == 0)
272     rname = "return";
273   else
274     rname = name;
275
276   directive = gi_source_directive_new (rname, value, options);
277   directives = g_hash_table_lookup (scanner->directives_map, symbol);
278   directives = g_slist_prepend (directives, directive);
279   g_hash_table_replace (scanner->directives_map, 
280                         g_strdup (symbol), directives);
281
282   g_strfreev (parts);
283   
284 }
285
286
287 static void
288 parse_comment (GISourceScanner *scanner)
289 {
290   GString *symbol = NULL;
291   gboolean startofline = FALSE, have_symbol = FALSE, start1 = FALSE, start_symbol = FALSE;
292   int c1, c2;
293
294   c1 = input();
295   c2 = input();
296
297   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
298     {
299       if (c1 == ':')
300         have_symbol = TRUE;
301       else if (c1 == '\n')
302          start1 = TRUE;
303       else if (c1 == '*' && start1)
304          start_symbol = TRUE;
305       else if (!have_symbol && start_symbol) 
306         {
307           if (!symbol)
308             symbol = g_string_new ("");
309           if (c1 != ' ')
310             g_string_append_c (symbol, c1);
311         }
312
313       if (c1 == '\n') 
314         {
315           ++lineno;
316           startofline = TRUE;
317         }
318
319       c1 = c2;
320       c2 = input();
321
322       if ((c1 != '*' && c1 != ' '))
323           startofline = FALSE;
324
325       if (startofline && (c1 == ' ') && (c2 == '@' || (c2 == 'r') || (c2 == 'R')))
326         {
327            c1 = c2;
328            c2 = input();
329            if (symbol)
330              parse_gtkdoc (scanner, symbol->str, &c1, &c2);
331         }
332     }
333
334   if (symbol)
335     g_string_free (symbol, TRUE);
336   
337 }
338
339 static int
340 check_identifier (GISourceScanner *scanner,
341                   const char  *s)
342 {
343         /*
344          * This function checks if `s' is a type name or an
345          * identifier.
346          */
347
348         if (gi_source_scanner_is_typedef (scanner, s)) {
349                 return TYPEDEF_NAME;
350         } else if (strcmp (s, "__builtin_va_list") == 0) {
351                 return TYPEDEF_NAME;
352         }
353
354         return IDENTIFIER;
355 }
356
357 static void
358 process_directive (GISourceScanner *scanner)
359 {
360         /* extract current filename from #line directives */
361         GString *filename_builder;
362         gboolean in_string, found_filename;
363
364         lineno = 0;
365         found_filename = FALSE;
366         in_string = FALSE;
367         filename_builder = g_string_new ("");
368
369         int c = input ();
370         while (c != EOF && c != '\n') {
371                 if (!in_string) {
372                         if (c == '\"') {
373                                 in_string = TRUE;
374                                 found_filename = TRUE;
375                         } else if (c >= '0' && c <= '9') {
376                                 if (!found_filename) {
377                                         lineno = lineno * 10 + (c - '0');
378                                 }
379                         }
380                 } else {
381                         if (c == '\"') {
382                                 in_string = FALSE;
383                         } else if (c == '\\') {
384                                 g_string_append_c (filename_builder, c);
385                                 c = input ();
386                                 g_string_append_c (filename_builder, c);
387                         } else {
388                                 g_string_append_c (filename_builder, c);
389                         }
390                 }
391                 c = input ();
392         }
393
394         if (filename_builder->len > 0) {
395                 char *filename = g_strcompress (filename_builder->str);
396                 if (g_realpath (filename))
397                   {
398                     g_free (scanner->current_filename);
399                     scanner->current_filename = g_realpath (filename);
400                     g_assert (scanner->current_filename);
401                     g_free(filename);
402                   }
403         }
404
405         g_string_free (filename_builder, TRUE);
406 }
407