Bug 554490: g-ir-scanner gets confused by '\\'
[platform/upstream/gobject-introspection.git] / giscanner / scannerlexer.l
1 /* -*- Mode: C -*-
2 /* GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33
34 #include <glib.h>
35 #include "sourcescanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38
39 int lineno;
40
41 extern int yylex (GISourceScanner *scanner);
42 #define YY_DECL int yylex (GISourceScanner *scanner)
43 static int yywrap (void);
44 static void parse_comment (GISourceScanner *scanner);
45 static void process_directive (GISourceScanner *scanner);
46 static int check_identifier (GISourceScanner *scanner, const char *);
47 static int parse_ignored_macro (void);
48 %}
49
50 intsuffix                               ([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
51 fracconst                               ([0-9]*\.[0-9]+)|([0-9]+\.)
52 exppart                                 [eE][-+]?[0-9]+
53 floatsuffix                             [fFlL]
54 chartext                                ([^\\\'])|(\\.) 
55 stringtext                              ([^\\\"])|(\\.)
56
57 %%
58
59 "\n"                                    { ++lineno; } /* " */
60 [\t\f\v\r ]+                            { /* Ignore whitespace. */ }
61
62 "/*"                                    { parse_comment(scanner); }
63 "//".*                                  { }
64
65 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("     { yyless (yyleng - 1); return FUNCTION_MACRO; }
66 "#define "[a-zA-Z_][a-zA-Z_0-9]*        { return OBJECT_MACRO; }
67
68 "#"                                     { process_directive(scanner); }
69
70 "{"                                     { return '{'; }
71 "<%"                                    { return '{'; }
72 "}"                                     { return '}'; }
73 "%>"                                    { return '}'; }
74 "["                                     { return '['; }
75 "<:"                                    { return '['; }
76 "]"                                     { return ']'; }
77 ":>"                                    { return ']'; }
78 "("                                     { return '('; }
79 ")"                                     { return ')'; }
80 ";"                                     { return ';'; }
81 ":"                                     { return ':'; }
82 "..."                                   { return ELLIPSIS; }
83 "?"                                     { return '?'; }
84 "."                                     { return '.'; }
85 "+"                                     { return '+'; }
86 "-"                                     { return '-'; }
87 "*"                                     { return '*'; }
88 "/"                                     { return '/'; }
89 "%"                                     { return '%'; }
90 "^"                                     { return '^'; }
91 "&"                                     { return '&'; }
92 "|"                                     { return '|'; }
93 "~"                                     { return '~'; }
94 "!"                                     { return '!'; }
95 "="                                     { return '='; }
96 "<"                                     { return '<'; }
97 ">"                                     { return '>'; }
98 "+="                                    { return ADDEQ; }
99 "-="                                    { return SUBEQ; }
100 "*="                                    { return MULEQ; }
101 "/="                                    { return DIVEQ; }
102 "%="                                    { return MODEQ; }
103 "^="                                    { return XOREQ; }
104 "&="                                    { return ANDEQ; }
105 "|="                                    { return OREQ; }
106 "<<"                                    { return SL; }
107 ">>"                                    { return SR; }
108 "<<="                                   { return SLEQ; }
109 ">>="                                   { return SREQ; }
110 "=="                                    { return EQ; }
111 "!="                                    { return NOTEQ; }
112 "<="                                    { return LTEQ; }
113 ">="                                    { return GTEQ; }
114 "&&"                                    { return ANDAND; }
115 "||"                                    { return OROR; }
116 "++"                                    { return PLUSPLUS; }
117 "--"                                    { return MINUSMINUS; }
118 ","                                     { return ','; }
119 "->"                                    { return ARROW; }
120
121 "__attribute__"                         { if (!parse_ignored_macro()) REJECT; }
122 "__const"                               { return CONST; }
123 "__extension__"                         { return EXTENSION; }
124 "__inline"                              { return INLINE; }
125 "__nonnull"                             { if (!parse_ignored_macro()) REJECT; }
126 "__restrict"                            { return RESTRICT; }
127
128 [a-zA-Z_][a-zA-Z_0-9]*                  { if (scanner->macro_scan) return IDENTIFIER; else REJECT; }
129
130 "asm"                                   { if (!parse_ignored_macro()) REJECT; }
131 "__asm__"                               { if (!parse_ignored_macro()) REJECT; }
132 "auto"                                  { return AUTO; }
133 "_Bool"                                 { return BOOL; }
134 "break"                                 { return BREAK; }
135 "case"                                  { return CASE; }
136 "char"                                  { return CHAR; }
137 "const"                                 { return CONST; }
138 "continue"                              { return CONTINUE; }
139 "default"                               { return DEFAULT; }
140 "do"                                    { return DO; }
141 "double"                                { return DOUBLE; }
142 "else"                                  { return ELSE; }
143 "enum"                                  { return ENUM; }
144 "extern"                                { return EXTERN; }
145 "float"                                 { return FLOAT; }
146 "for"                                   { return FOR; }
147 "goto"                                  { return GOTO; }
148 "if"                                    { return IF; }
149 "inline"                                { return INLINE; }
150 "__inline__"                            { return INLINE; }
151 "int"                                   { return INT; }
152 "long"                                  { return LONG; }
153 "register"                              { return REGISTER; }
154 "restrict"                              { return RESTRICT; }
155 "return"                                { return RETURN; }
156 "short"                                 { return SHORT; }
157 "signed"                                { return SIGNED; }
158 "sizeof"                                { return SIZEOF; }
159 "static"                                { return STATIC; }
160 "struct"                                { return STRUCT; }
161 "switch"                                { return SWITCH; }
162 "typedef"                               { return TYPEDEF; }
163 "union"                                 { return UNION; }
164 "unsigned"                              { return UNSIGNED; }
165 "void"                                  { return VOID; }
166 "volatile"                              { return VOLATILE; }
167 "while"                                 { return WHILE; }
168
169 [a-zA-Z_][a-zA-Z_0-9]*                  { return check_identifier(scanner, yytext); }
170
171 "0"[xX][0-9a-fA-F]+{intsuffix}?         { return INTEGER; }
172 "0"[0-7]+{intsuffix}?                   { return INTEGER; }
173 [0-9]+{intsuffix}?                      { return INTEGER; }
174
175 {fracconst}{exppart}?{floatsuffix}?     { return FLOATING; }
176 [0-9]+{exppart}{floatsuffix}?           { return FLOATING; }
177
178 "'"{chartext}*"'"                       { return CHARACTER; }
179 "L'"{chartext}*"'"                      { return CHARACTER; }
180
181 "\""{stringtext}*"\""                   { return STRING; }
182 "L\""{stringtext}*"\""                  { return STRING; }
183
184 .                                       { fprintf(stderr, "%s:%d: unexpected character `%c'\n", scanner->current_filename, lineno, yytext[0]); }
185
186 %%
187
188 static int
189 yywrap (void)
190 {
191   return 1;
192 }
193
194
195 static void
196 parse_gtkdoc (GISourceScanner *scanner,
197               gchar           *symbol,
198               int             *c1,
199               int             *c2)
200 {
201   gboolean isline = FALSE;
202   gchar line[256];
203   int i;
204   gchar **parts;
205   GISourceDirective *directive;
206   char *name,*value;
207   GSList *directives;
208   GSList *options = NULL;
209   char *rname;
210   int n_parts;
211
212   i = 0;
213   do 
214     {
215       *c1 = *c2;
216       if (*c1 == '\n')
217         {
218           isline = TRUE;
219           break;
220         }
221       if (i >= 256)
222         break;
223       line[i++] = *c1;
224       *c2 = input();
225     } while (*c2 != EOF && !(*c1 == '*' && *c2 == '/'));
226   
227   if (!isline)
228     return;
229
230   line[i] = '\0';
231
232   parts = g_strsplit (line, ": ", 3);
233   n_parts = g_strv_length (parts);
234
235   if (g_ascii_strcasecmp (parts[0], "eprecated") == 0)
236     {
237       if (n_parts == 3)
238         options = g_slist_prepend (options, g_strdup_printf ("%s: %s", parts[1], parts[2]));
239       else if (n_parts == 2)
240         options = g_slist_prepend (options, g_strdup (parts[1]));
241       else
242         options = g_slist_prepend (options, g_strdup (""));
243       name = parts[0];
244       value = NULL;
245     }
246   else if (n_parts >= 2)
247     {
248       name = parts[0];
249
250       if (n_parts == 3) 
251         {
252           char *ptr = g_strdup (parts[1]);
253           char **option_parts, **option_part;
254
255           if (*ptr == '<')
256             {
257               char *end = strchr (ptr, '>');
258               if (end) 
259                 {
260                   *end = '\0';
261                   option_parts = g_strsplit (ptr+1, ",", 0);
262                   for (option_part = option_parts; *option_part; option_part++)
263                     options = g_slist_prepend (options, g_strdup (*option_part));
264                   options = g_slist_reverse (options);
265                   g_strfreev (option_parts);
266                 }
267             }
268           g_free (ptr);
269           value = parts[2];
270         } 
271       else
272         value = parts[1];
273     }
274   else /* parts == 1 */
275     {
276       name = parts[0];
277       value = NULL;
278     }
279
280   /*
281    * Special cases for global annotations.
282    * Context-sensitive parsing would probably be the right way to go.
283    */
284   if (g_ascii_strncasecmp ("eturn", name, 5) == 0)
285     rname = "return";
286   else if (g_ascii_strncasecmp ("eprecated", name, 9) == 0)
287     rname = "deprecated";
288   else
289     rname = name;
290
291   directive = gi_source_directive_new (rname, value, options);
292   directives = g_hash_table_lookup (scanner->directives_map, symbol);
293   directives = g_slist_prepend (directives, directive);
294   g_hash_table_replace (scanner->directives_map, 
295                         g_strdup (symbol), directives);
296
297   g_strfreev (parts);
298   
299 }
300
301
302 static void
303 parse_comment (GISourceScanner *scanner)
304 {
305   GString *symbol = NULL;
306   gboolean startofline = FALSE, have_symbol = FALSE, start1 = FALSE, start_symbol = FALSE;
307   int c1, c2;
308
309   c1 = input();
310   c2 = input();
311
312   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
313     {
314       if (c1 == ':')
315         have_symbol = TRUE;
316       else if (c1 == '\n')
317          start1 = TRUE;
318       else if (c1 == '*' && start1)
319          start_symbol = TRUE;
320       else if (!have_symbol && start_symbol) 
321         {
322           if (!symbol)
323             symbol = g_string_new ("");
324           if (c1 != ' ')
325             g_string_append_c (symbol, c1);
326         }
327
328       if (c1 == '\n') 
329         {
330           ++lineno;
331           startofline = TRUE;
332         }
333
334       c1 = c2;
335       c2 = input();
336
337       if ((c1 != '*' && c1 != ' '))
338           startofline = FALSE;
339
340       if (startofline && (c1 == ' ') && (c2 == '@' || (c2 == 'r') || (c2 == 'R') || (c2 == 'D')))
341         {
342            c1 = c2;
343            c2 = input();
344            if (symbol)
345              parse_gtkdoc (scanner, symbol->str, &c1, &c2);
346         }
347     }
348
349   if (symbol)
350     g_string_free (symbol, TRUE);
351   
352 }
353
354 static int
355 check_identifier (GISourceScanner *scanner,
356                   const char  *s)
357 {
358         /*
359          * This function checks if `s' is a type name or an
360          * identifier.
361          */
362
363         if (gi_source_scanner_is_typedef (scanner, s)) {
364                 return TYPEDEF_NAME;
365         } else if (strcmp (s, "__builtin_va_list") == 0) {
366                 return TYPEDEF_NAME;
367         }
368
369         return IDENTIFIER;
370 }
371
372 static void
373 process_directive (GISourceScanner *scanner)
374 {
375         /* extract current filename from #line directives */
376         GString *filename_builder;
377         gboolean in_string, found_filename;
378
379         lineno = 0;
380         found_filename = FALSE;
381         in_string = FALSE;
382         filename_builder = g_string_new ("");
383
384         int c = input ();
385         while (c != EOF && c != '\n') {
386                 if (!in_string) {
387                         if (c == '\"') {
388                                 in_string = TRUE;
389                                 found_filename = TRUE;
390                         } else if (c >= '0' && c <= '9') {
391                                 if (!found_filename) {
392                                         lineno = lineno * 10 + (c - '0');
393                                 }
394                         }
395                 } else {
396                         if (c == '\"') {
397                                 in_string = FALSE;
398                         } else if (c == '\\') {
399                                 g_string_append_c (filename_builder, c);
400                                 c = input ();
401                                 g_string_append_c (filename_builder, c);
402                         } else {
403                                 g_string_append_c (filename_builder, c);
404                         }
405                 }
406                 c = input ();
407         }
408
409         if (filename_builder->len > 0) {
410                 char *filename = g_strcompress (filename_builder->str);
411                 if (g_realpath (filename))
412                   {
413                     g_free (scanner->current_filename);
414                     scanner->current_filename = g_realpath (filename);
415                     g_assert (scanner->current_filename);
416                     g_free(filename);
417                   }
418         }
419
420         g_string_free (filename_builder, TRUE);
421 }
422
423 /*
424  * This parses a macro which is ignored, such as
425  * __attribute__((x)) or __asm__ (x)
426  */
427 static int
428 parse_ignored_macro (void)
429 {
430         int c;
431         int nest;
432
433         while ((c = input ()) != EOF && isspace (c))
434                 ;
435         if (c != '(')
436                 return FALSE;
437
438         nest = 0;
439         while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
440                 if (c == '(')
441                         nest++;
442                 else if (c == ')')
443                         nest--;
444                 else if (c == '"') {
445                         while ((c = input ()) != EOF && c != '"') {
446                                 if (c == '\\')
447                                         c = input ();
448                         }
449                 } else if (c == '\'') {
450                         c = input ();
451                         if (c == '\\')
452                                 c = input ();
453                         else if (c == '\'')
454                                 return FALSE;
455                         c = input ();
456                         if (c != '\'')
457                                 return FALSE;
458                 } else if (c == '\n')
459                         lineno++;
460         }
461
462         return TRUE;
463 }