Bug 559737 – Deal with continuations in macros
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40
41 extern int yylex (GISourceScanner *scanner);
42 #define YY_DECL int yylex (GISourceScanner *scanner)
43 static int yywrap (void);
44 static void parse_comment (GISourceScanner *scanner);
45 static void process_directive (GISourceScanner *scanner);
46 static int check_identifier (GISourceScanner *scanner, const char *);
47 static int parse_ignored_macro (void);
48 %}
49
50 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
51 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
52 exppart                                 [eE][-+]?[0-9]+
53 floatsuffix                             [fFlL]
54 chartext                                ([^\\\'])|(\\.) 
55 stringtext                              ([^\\\"])|(\\.)
56
57 %%
58
59 "\n"                                    { ++lineno; } /* " */
60 "\\\n"                                  { ++lineno; }
61 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
62
63 "/*"                                    { parse_comment(scanner); }
64 "//".*                                  { }
65
66 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
67 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
68
69 "#"                                     { process_directive(scanner); }
70
71 "{"                                     { return '{'; }
72 "<%"                                    { return '{'; }
73 "}"                                     { return '}'; }
74 "%>"                                    { return '}'; }
75 "["                                     { return '['; }
76 "<:"                                    { return '['; }
77 "]"                                     { return ']'; }
78 ":>"                                    { return ']'; }
79 "("                                     { return '('; }
80 ")"                                     { return ')'; }
81 ";"                                     { return ';'; }
82 ":"                                     { return ':'; }
83 "..."                                   { return ELLIPSIS; }
84 "?"                                     { return '?'; }
85 "."                                     { return '.'; }
86 "+"                                     { return '+'; }
87 "-"                                     { return '-'; }
88 "*"                                     { return '*'; }
89 "/"                                     { return '/'; }
90 "%"                                     { return '%'; }
91 "^"                                     { return '^'; }
92 "&"                                     { return '&'; }
93 "|"                                     { return '|'; }
94 "~"                                     { return '~'; }
95 "!"                                     { return '!'; }
96 "="                                     { return '='; }
97 "<"                                     { return '<'; }
98 ">"                                     { return '>'; }
99 "+="                                    { return ADDEQ; }
100 "-="                                    { return SUBEQ; }
101 "*="                                    { return MULEQ; }
102 "/="                                    { return DIVEQ; }
103 "%="                                    { return MODEQ; }
104 "^="                                    { return XOREQ; }
105 "&="                                    { return ANDEQ; }
106 "|="                                    { return OREQ; }
107 "<<"                                    { return SL; }
108 ">>"                                    { return SR; }
109 "<<="                                   { return SLEQ; }
110 ">>="                                   { return SREQ; }
111 "=="                                    { return EQ; }
112 "!="                                    { return NOTEQ; }
113 "<="                                    { return LTEQ; }
114 ">="                                    { return GTEQ; }
115 "&&"                                    { return ANDAND; }
116 "||"                                    { return OROR; }
117 "++"                                    { return PLUSPLUS; }
118 "--"                                    { return MINUSMINUS; }
119 ","                                     { return ','; }
120 "->"                                    { return ARROW; }
121
122 "__asm"                                 { if (!parse_ignored_macro()) REJECT; }
123 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
124 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
125 "__attribute"                           { if (!parse_ignored_macro()) REJECT; }
126 "__const"                               { return CONST; }
127 "__extension__"                         { return EXTENSION; }
128 "__inline"                              { return INLINE; }
129 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
130 "__signed__"                            { return SIGNED; }
131 "__restrict"                            { return RESTRICT; }
132 "__typeof"                              { if (!parse_ignored_macro()) REJECT; }
133 "_Bool"                                 { return BOOL; }
134
135 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
136
137 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
138 "auto"                                  { return AUTO; }
139 "break"                                 { return BREAK; }
140 "case"                                  { return CASE; }
141 "char"                                  { return CHAR; }
142 "const"                                 { return CONST; }
143 "continue"                              { return CONTINUE; }
144 "default"                               { return DEFAULT; }
145 "do"                                    { return DO; }
146 "double"                                { return DOUBLE; }
147 "else"                                  { return ELSE; }
148 "enum"                                  { return ENUM; }
149 "extern"                                { return EXTERN; }
150 "float"                                 { return FLOAT; }
151 "for"                                   { return FOR; }
152 "goto"                                  { return GOTO; }
153 "if"                                    { return IF; }
154 "inline"                                { return INLINE; }
155 "int"                                   { return INT; }
156 "long"                                  { return LONG; }
157 "register"                              { return REGISTER; }
158 "restrict"                              { return RESTRICT; }
159 "return"                                { return RETURN; }
160 "short"                                 { return SHORT; }
161 "signed"                                { return SIGNED; }
162 "sizeof"                                { return SIZEOF; }
163 "static"                                { return STATIC; }
164 "struct"                                { return STRUCT; }
165 "switch"                                { return SWITCH; }
166 "typedef"                               { return TYPEDEF; }
167 "union"                                 { return UNION; }
168 "unsigned"                              { return UNSIGNED; }
169 "void"                                  { return VOID; }
170 "volatile"                              { return VOLATILE; }
171 "while"                                 { return WHILE; }
172
173 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
174
175 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
176 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
177 [0-9]+{intsuffix}?                      { return INTEGER; }
178
179 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
180 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
181
182 "'"{chartext}*"'"                       { return CHARACTER; }
183 "L'"{chartext}*"'"                      { return CHARACTER; }
184
185 "\""{stringtext}*"\""                   { return STRING; }
186 "L\""{stringtext}*"\""                  { return STRING; }
187
188 .                                       { fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
189
190 %%
191
192 static int
193 yywrap (void)
194 {
195   return 1;
196 }
197
198
199 static void
200 parse_gtkdoc (GISourceScanner *scanner,
201               gchar           *symbol,
202               int             *c1,
203               int             *c2)
204 {
205   gboolean isline = FALSE;
206   GString *line_buf;
207   char *line;
208   gchar **parts;
209   GISourceDirective *directive;
210   char *name,*value;
211   GSList *directives;
212   GSList *options = NULL;
213   char *rname;
214   int n_parts;
215
216   line_buf = g_string_new ("");
217
218   do 
219     {
220       *c1 = *c2;
221       if (*c1 == '\n')
222         {
223           isline = TRUE;
224           break;
225         }
226       g_string_append_c (line_buf, *c1);
227       *c2 = input();
228     } while (*c2 != EOF && !(*c1 == '*' && *c2 == '/'));
229   
230   if (!isline)
231     {
232       g_string_free (line_buf, TRUE);
233       return;
234     }
235
236   line = g_string_free (line_buf, FALSE);
237
238   /* Ignore lines that don't have a : - this is a hack but avoids
239    * trying to parse too many things as annotations
240    */
241   if (!strchr (line, ':'))
242     {
243       g_free (line);
244       return;
245     }
246
247   parts = g_strsplit (line, ":", 3);
248   n_parts = g_strv_length (parts);
249
250   if (g_ascii_strcasecmp (parts[0], "eprecated") == 0)
251     {
252       if (n_parts == 3)
253         options = g_slist_prepend (options, g_strdup_printf ("%s: %s", parts[1], parts[2]));
254       else if (n_parts == 2)
255         options = g_slist_prepend (options, g_strdup (parts[1]));
256       else
257         options = g_slist_prepend (options, g_strdup (""));
258       name = parts[0];
259       value = NULL;
260     }
261   else if (n_parts >= 2)
262     {
263       name = parts[0];
264
265       if (n_parts == 3) 
266         {
267           char *ptr = g_strdup (parts[1]);
268           char *start;
269           char *end;
270
271           options = NULL;
272           start = strchr (ptr, '(');
273           while (start != NULL) 
274             {
275               end = strchr (start, ')');
276               if (end)
277                 {
278                   options = g_slist_prepend (options, g_strndup (start+1, end-(start+1)));
279                   start = strchr (end+1, '(');
280                 }
281               else
282                 {
283                   break;
284                 }
285             }
286           g_free (ptr);
287           value = parts[2];
288         } 
289       else
290         value = parts[1];
291     }
292   else /* parts == 1 */
293     {
294       name = parts[0];
295       value = NULL;
296     }
297
298   /*
299    * Special cases for global annotations.
300    * Context-sensitive parsing would probably be the right way to go.
301    */
302   if (g_ascii_strncasecmp ("eturn", name, 5) == 0)
303     rname = "return";
304   else if (g_ascii_strncasecmp ("eprecated", name, 9) == 0)
305     rname = "deprecated";
306   else
307     rname = name;
308
309   directive = gi_source_directive_new (rname, value, options);
310   directives = g_hash_table_lookup (scanner->directives_map, symbol);
311   directives = g_slist_prepend (directives, directive);
312   g_hash_table_replace (scanner->directives_map,
313                         g_strdup (symbol), directives);
314
315   g_strfreev (parts);
316   g_free (line);
317 }
318
319
320 static void
321 parse_comment (GISourceScanner *scanner)
322 {
323   GString *symbol = NULL;
324   gboolean startofline = FALSE, have_symbol = FALSE, start1 = FALSE, start_symbol = FALSE;
325   int c1, c2;
326
327   c1 = input();
328   c2 = input();
329
330   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
331     {
332       if (c1 == ':')
333         have_symbol = TRUE;
334       else if (c1 == '\n')
335          start1 = TRUE;
336       else if (c1 == '*' && start1)
337          start_symbol = TRUE;
338       else if (!have_symbol && start_symbol) 
339         {
340           if (!symbol)
341             symbol = g_string_new ("");
342           if (c1 != ' ')
343             g_string_append_c (symbol, c1);
344         }
345
346       if (c1 == '\n') 
347         {
348           ++lineno;
349           startofline = TRUE;
350         }
351
352       c1 = c2;
353       c2 = input();
354
355       if ((c1 != '*' && c1 != ' '))
356           startofline = FALSE;
357
358       if (startofline && (c1 == ' ') && (c2 == '@' || (c2 == 'r') || (c2 == 'R') || (c2 == 'D')))
359         {
360            c1 = c2;
361            c2 = input();
362            if (symbol)
363              parse_gtkdoc (scanner, symbol->str, &c1, &c2);
364         }
365     }
366
367   if (symbol)
368     g_string_free (symbol, TRUE);
369   
370 }
371
372 static int
373 check_identifier (GISourceScanner *scanner,
374                   const char  *s)
375 {
376         /*
377          * This function checks if `s' is a type name or an
378          * identifier.
379          */
380
381         if (gi_source_scanner_is_typedef (scanner, s)) {
382                 return TYPEDEF_NAME;
383         } else if (strcmp (s, "__builtin_va_list") == 0) {
384                 return TYPEDEF_NAME;
385         }
386
387         return IDENTIFIER;
388 }
389
390 static void
391 process_directive (GISourceScanner *scanner)
392 {
393         /* extract current filename from #line directives */
394         GString *filename_builder;
395         gboolean in_string, found_filename;
396
397         lineno = 0;
398         found_filename = FALSE;
399         in_string = FALSE;
400         filename_builder = g_string_new ("");
401
402         int c = input ();
403         while (c != EOF && c != '\n') {
404                 if (!in_string) {
405                         if (c == '\"') {
406                                 in_string = TRUE;
407                                 found_filename = TRUE;
408                         } else if (c >= '0' && c <= '9') {
409                                 if (!found_filename) {
410                                         lineno = lineno * 10 + (c - '0');
411                                 }
412                         }
413                 } else {
414                         if (c == '\"') {
415                                 in_string = FALSE;
416                         } else if (c == '\\') {
417                                 g_string_append_c (filename_builder, c);
418                                 c = input ();
419                                 g_string_append_c (filename_builder, c);
420                         } else {
421                                 g_string_append_c (filename_builder, c);
422                         }
423                 }
424                 c = input ();
425         }
426
427         if (filename_builder->len > 0) {
428                 char *filename = g_strcompress (filename_builder->str);
429                 if (g_realpath (filename))
430                   {
431                     g_free (scanner->current_filename);
432                     scanner->current_filename = g_realpath (filename);
433                     g_assert (scanner->current_filename);
434                     g_free(filename);
435                   }
436         }
437
438         g_string_free (filename_builder, TRUE);
439 }
440
441 /*
442  * This parses a macro which is ignored, such as
443  * __attribute__((x)) or __asm__ (x)
444  */
445 static int
446 parse_ignored_macro (void)
447 {
448         int c;
449         int nest;
450
451         while ((c = input ()) != EOF && isspace (c))
452                 ;
453         if (c != '(')
454                 return FALSE;
455
456         nest = 0;
457         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
458                 if (c == '(')
459                         nest++;
460                 else if (c == ')')
461                         nest--;
462                 else if (c == '"') {
463                         while ((c = input ()) != EOF && c != '"') {
464                                 if (c == '\\')
465                                         c = input ();
466                         }
467                 } else if (c == '\'') {
468                         c = input ();
469                         if (c == '\\')
470                                 c = input ();
471                         else if (c == '\'')
472                                 return FALSE;
473                         c = input ();
474                         if (c != '\'')
475                                 return FALSE;
476                 } else if (c == '\n')
477                         lineno++;
478         }
479
480         return TRUE;
481 }