resetting manifest requested domain to floor
[platform/upstream/cscope.git] / src / fscanner.l
1 %{
2 /*===========================================================================
3  Copyright (c) 1998-2000, The Santa Cruz Operation 
4  All rights reserved.
5  
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9  *Redistributions of source code must retain the above copyright notice,
10  this list of conditions and the following disclaimer.
11
12  *Redistributions in binary form must reproduce the above copyright notice,
13  this list of conditions and the following disclaimer in the documentation
14  and/or other materials provided with the distribution.
15
16  *Neither name of The Santa Cruz Operation nor the names of its contributors
17  may be used to endorse or promote products derived from this software
18  without specific prior written permission. 
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
21  IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION)
28  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31  DAMAGE. 
32  =========================================================================*/
33
34 /*      cscope - interactive C symbol cross-reference
35  *
36  *      C symbol scanner
37  */
38 #include "global.h"
39 #include "alloc.h"
40 #include "scanner.h"
41 #include "lookup.h"
42
43 #include <assert.h>
44
45 /* the line counting has been moved from character reading for speed */
46 /* comments are discarded */
47
48 #ifndef FLEX_SCANNER
49 # error Sorry, this scanner needs flex. It is not usable with AT&T Lex.
50 #endif
51
52 #define IFLEVELINC      5       /* #if nesting level size increment */
53 #define YY_NO_TOP_STATE 1
54
55 static char const rcsid[] = "$Id: fscanner.l,v 1.17 2012/08/02 21:48:08 broeker Exp $";
56
57 int     first;  /* buffer index for first char of symbol */
58 int     last;   /* buffer index for last char of symbol */
59 int     lineno; /* symbol line number */
60 int     myylineno = 1;
61
62 /* HBB 20001007: new variables, emulating yytext in a way that allows 
63  * the yymore() simulation, my_yymore(), to be used even in the presence of 
64  * yyless(). */
65 size_t my_yyleng = 0;
66 char *my_yytext = NULL;
67
68 static  BOOL    arraydimension;         /* inside array dimension declaration */
69 static  BOOL    bplisting;              /* breakpoint listing */
70 static  int     braces;                 /* unmatched left brace count */
71 static  BOOL    classdef;               /* c++ class definition */
72 static  BOOL    elseelif;               /* #else or #elif found */
73 static  BOOL    esudef;                 /* enum/struct/union global definition */
74 static  BOOL    external;               /* external definition */
75 static  int     externalbraces;         /* external definition outer brace count */
76 static  BOOL    fcndef;                 /* function definition */
77 static  BOOL    global;                 /* file global scope (outside functions) */
78 static  int     iflevel;                /* #if nesting level */
79 static  BOOL    initializer;            /* data initializer */
80 static  int     initializerbraces;      /* data initializer outer brace count */
81 static  BOOL    lex;                    /* lex file */
82 static  int     miflevel = IFLEVELINC;  /* maximum #if nesting level */
83 static  int     *maxifbraces;           /* maximum brace count within #if */
84 static  int     *preifbraces;           /* brace count before #if */
85 static  int     parens;                 /* unmatched left parenthesis count */
86 static  BOOL    ppdefine;               /* preprocessor define statement */
87 static  BOOL    pseudoelif;             /* pseudo-#elif */
88 static  BOOL    oldtype;                /* next identifier is an old type */
89 static  BOOL    rules;                  /* lex/yacc rules */
90 static  BOOL    sdl;                    /* sdl file */
91 static  BOOL    structfield;            /* structure field declaration */
92 static  int     tagdef;                 /* class/enum/struct/union tag definition */
93 static  BOOL    template;               /* function template */
94 static  int     templateparens;         /* function template outer parentheses count */
95 static  int     typedefbraces = -1;     /* initial typedef brace count */
96 static  int     token;                  /* token found */
97 static  int     ident_start;            /* begin of preceding identifier */
98
99 /* If this is defined to 1, use flex rules rather than the input
100  * function to discard comments. The scanner gains quite a bit of
101  * speed this way, because of a large reduction of the number of I/O
102  * system/library calls.  The original skipcomment_input() called
103  * getc() so often that the call overhead of shared libraries
104  * vs. static linking, alone, already caused a sizeable performance
105  * hit (up to 40% gross gain on a cscope -cub of its own source
106  * dir). */
107 #define COMMENTS_BY_FLEX 1
108
109 #if !COMMENTS_BY_FLEX
110 static  int     skipcomment_input(void);
111 static  int     comment(void);
112 static  int     insidestring_input(int);
113 #endif
114
115 static  void    my_yymore(void);
116
117 #if COMMENTS_BY_FLEX
118 # define skipcomment_input input
119 #else
120
121 # define YY_INPUT(buf,result,max_size)                          \
122 {                                                               \
123         int c = skipcomment_input ();                           \
124         result = (c == EOF) ? YY_NULL : (buf[0] = c, 1);        \
125 }
126
127 #endif /* !COMMENTS_BY_FLEX*/
128
129
130 %}
131 identifier      [a-zA-Z_$][a-zA-Z_0-9$]*
132 number          \.?[0-9][.0-9a-fA-FlLuUxX]*
133 comment         "/*"([^*]*("*"+[^/])?)*"*/"|"//"[^\n]*\n
134 ws              [ \t\r\v\f]
135 wsnl            [ \t\r\v\f\n]|{comment}
136
137 /* flex options: stack of start conditions, and don't use yywrap() */
138 %option stack
139 %option noyywrap
140
141 %start SDL
142 %a 4000
143 %o 7000
144
145 /* exclusive start conditions. not available in AT&T lex -> use flex! */
146 %x IN_PREPROC WAS_ENDIF WAS_IDENTIFIER WAS_ESU IN_DQUOTE IN_SQUOTE COMMENT
147
148 %%
149
150 %\{             {       /* lex/yacc C declarations/definitions */
151                         global = YES;
152                         goto more;
153                         /* NOTREACHED */
154                 }
155 %\}             {
156                         global = NO;
157                         goto more;
158                         /* NOTREACHED */
159                 }
160 ^%%             {       /* lex/yacc rules delimiter */
161                         braces = 0;
162                         if (rules == NO) {
163                                 /* this %% starts the section containing the rules */
164                                 rules = YES;
165
166                                 /* Copy yytext to private buffer, to be able to add further
167                                  * content following it: */
168                                 my_yymore();
169
170                                 /* simulate a yylex() or yyparse() definition */
171                                 (void) strcat(my_yytext, " /* ");
172                                 first = strlen(my_yytext);
173                                 if (lex == YES) {
174                                         (void) strcat(my_yytext, "yylex");
175                                 } else {        
176                                         /* yacc: yyparse implicitly calls yylex */
177                                         char *s = " yylex()";
178                                         char *cp = s + strlen(s);
179                                         while (--cp >= s) {
180                                                 unput(*cp);
181                                         }
182                                         (void) strcat(my_yytext, "yyparse");
183                                 }
184                                 last = strlen(my_yytext);
185                                 (void) strcat(my_yytext, " */");
186                                 my_yyleng = strlen(my_yytext);
187                                 return(FCNDEF);
188                         } else {
189                                 /* were in the rules section, now comes the closing one */
190                                 rules = NO;
191                                 global = YES;
192                                 last = first;
193                                 my_yymore();
194                                 return(FCNEND);
195                                 /* NOTREACHED */
196                         }
197                 }
198
199 <SDL>STATE[ \t]+({identifier}|\*)       { /* sdl state, treat as function def */
200                         braces = 1;
201                         fcndef = YES;
202                         token = FCNDEF;
203                         goto findident;
204                         /* NOTREACHED */
205                 }
206 <SDL>ENDSTATE[ \t]      { /* end of an sdl state, treat as end of a function */
207                         goto endstate;
208                         /* NOTREACHED */
209                 }
210
211 \{              {       /* count unmatched left braces for fcn def detection */
212                         ++braces;
213                         
214                         /* mark an untagged enum/struct/union so its beginning
215                            can be found */
216                         if (tagdef) {
217                                 if (braces == 1) {
218                                         esudef = YES;
219                                 }
220                                 token = tagdef;
221                                 tagdef = '\0';
222                                 last = first;
223                                 my_yymore();
224                                 return(token);
225                         }
226                         goto more;
227                         /* NOTREACHED */
228                 }
229
230 \#{ws}* { /* start a preprocessor line */
231                         if (rules == NO)                /* don't consider CPP for lex/yacc rules */
232                                 BEGIN(IN_PREPROC);
233                         yyleng = 1;     /* get rid of the blanks, if any */
234                         goto more;
235                         /* NOTREACHED */
236                 }
237 <IN_PREPROC>endif([^a-zA-Z0-9_$\n].*)?  {       /* #endif */
238                         /* delay treatment of #endif depending on whether an
239                          * #if comes right after it, or not */
240                         /* HBB 20010619: new pattern allows trailing garbage
241                          * after the #endif */
242                         BEGIN(WAS_ENDIF);
243                         goto more;
244                         /* NOTREACHED */
245                 }
246 <WAS_ENDIF>\n{wsnl}*#{ws}*if(ndef|def)?{ws}+            {
247                         /* attempt to correct erroneous brace count caused by:
248                          * 
249                          * #if ...
250                          *      ... {
251                          * #endif
252                          * #if ...
253                          *      ... {
254                          * #endif
255                          */
256                         /* the current #if must not have an #else or #elif */
257                         if (elseelif == YES) {
258                                 goto endif;
259                                 /* NOTREACHED */
260                         }
261                         pseudoelif = YES;
262                         BEGIN(INITIAL);
263                         yyless(1);      /* rescan all but the line ending */
264                         yy_set_bol(1);
265                         goto eol;
266                         /* NOTREACHED */
267                 }
268 <WAS_ENDIF>\n{wsnl}*             {      /* an #endif with no #if right after it */
269                 endif:
270                         if (iflevel > 0) {
271                                 /* get the maximum brace count for this #if */
272                                 if (braces < maxifbraces[--iflevel]) {
273                                         braces = maxifbraces[iflevel];
274                                 }
275                         }
276                         BEGIN(INITIAL);
277                         yyless(1);
278                         yy_set_bol(1);
279                         goto eol;
280                         /* NOTREACHED */
281                 }
282
283 <IN_PREPROC>ifndef{ws}+ |
284 <IN_PREPROC>ifdef{ws}+          |
285 <IN_PREPROC>if{ws}+             { /* #if directive */
286                         elseelif = NO;
287                         if (pseudoelif == YES) {
288                                 pseudoelif = NO;
289                                 goto elif;
290                                 /* NOTREACHED */
291                         }
292                         /* make sure there is room for the current brace count */
293                         if (iflevel == miflevel) {
294                                 miflevel += IFLEVELINC;
295                                 maxifbraces = myrealloc(maxifbraces, miflevel * sizeof(int));
296                                 preifbraces = myrealloc(preifbraces, miflevel * sizeof(int));
297                         }
298                         /* push the current brace count */
299                         preifbraces[iflevel] = braces;
300                         maxifbraces[iflevel++] = 0;
301                         BEGIN(INITIAL);
302                         goto more;
303                         /* NOTREACHED */
304                 }
305 <IN_PREPROC>else({ws}.*)?       { /* #else --- eat up whole line */
306                         elseelif = YES;
307                         if (iflevel > 0) {
308                                 
309                                 /* save the maximum brace count for this #if */
310                                 if (braces > maxifbraces[iflevel - 1]) {
311                                         maxifbraces[iflevel - 1] = braces;
312                                 }
313                                 /* restore the brace count to before the #if */
314                                 braces = preifbraces[iflevel - 1];
315                         }
316                         BEGIN(INITIAL);
317                         goto more;
318                         /* NOTREACHED */
319                 }
320 <IN_PREPROC>elif{ws}+   { /* #elif */
321                         /* elseelif = YES; --- HBB I doubt this is correct */
322                 elif:
323                         if (iflevel > 0) {
324                                 
325                                 /* save the maximum brace count for this #if */
326                                 if (braces > maxifbraces[iflevel - 1]) {
327                                         maxifbraces[iflevel - 1] = braces;
328                                 }
329                                 /* restore the brace count to before the #if */
330                                 braces = preifbraces[iflevel - 1];
331                         }
332                         BEGIN(INITIAL);
333                         goto more;
334                         /* NOTREACHED */
335                 }
336
337 <IN_PREPROC>include{ws}*\"[^"\n]+\" |
338 <IN_PREPROC>include{ws}*<[^>\n]+>       { /* #include file */
339                         char    *s;
340                         char remember = yytext[yyleng-1];
341                         
342                         my_yymore();
343                         s = strpbrk(my_yytext, "\"<");
344                         if (!s)
345                                 return(LEXERR);
346                         my_yytext[my_yyleng-1] = '\0';
347                         incfile(s + 1, s);
348                         my_yytext[my_yyleng-1] = remember;
349                         first = s - my_yytext;
350                         last = my_yyleng - 1;
351                         if (compress == YES) {
352                                 my_yytext[0] = '\2';    /* compress the keyword */
353                         }
354                         BEGIN(INITIAL);
355                         return(INCLUDE);
356                         /* NOTREACHED */
357                 }
358
359 \}              {
360                         /* could be the last enum member initializer */
361                         if (braces == initializerbraces) {
362                                 initializerbraces = -1;
363                                 initializer = NO;
364                         }
365                         if (--braces <= 0) {
366                 endstate:
367                                 braces = 0;
368                                 classdef = NO;
369                         }
370                         if (braces == 0 || (braces == 1 && classdef == YES)) {
371
372                                 /* if the end of an enum/struct/union definition */
373                                 if (esudef == YES) {
374                                         esudef = NO;
375                                 }
376                                 /* if the end of the function */
377                                 else if (fcndef == YES) {
378                                         fcndef = NO;
379                                         last = first;
380                                         my_yymore();
381                                         return(FCNEND);
382                                 }
383                         }
384                         goto more;
385                         /* NOTREACHED */
386                 }
387
388 \(              {       /* count unmatched left parentheses for function templates */
389                         ++parens;
390                         goto more;
391                         /* NOTREACHED */
392                 }
393 \)              {
394                         if (--parens <= 0) {
395                                 parens = 0;
396                         }
397                         /* if the end of a function template */
398                         if (parens == templateparens) {
399                                 templateparens = -1;
400                                 template = NO;
401                         }
402                         goto more;
403                         /* NOTREACHED */
404                 }
405 =               {       /* if a global definition initializer */
406                         if (!my_yytext)
407                                 return(LEXERR);
408                         if (global == YES && ppdefine == NO && my_yytext[0] != '#') {
409                                 initializerbraces = braces;
410                                 initializer = YES;
411                         }
412                         goto more;
413                         /* NOTREACHED */
414                 }
415 :               {       /* a if global structure field */
416                         if (!my_yytext)
417                                 return(LEXERR);
418                         if (global == YES && ppdefine == NO && my_yytext[0] != '#') {
419                                 structfield = YES;
420                         }
421                         goto more;
422                         /* NOTREACHED */
423                 }
424 \,              {
425                         if (braces == initializerbraces) {
426                                 initializerbraces = -1;
427                                 initializer = NO;
428                         }
429                         structfield = NO;
430                         goto more;
431                         /* NOTREACHED */
432                 }
433 ;               {       /* if the enum/struct/union was not a definition */
434                         if (braces == 0) {
435                                 esudef = NO;
436                         }
437                         /* if the end of a typedef */
438                         if (braces == typedefbraces) {
439                                 typedefbraces = -1;
440                         }
441                         /* if the end of a external definition */
442                         if (braces == externalbraces) {
443                                 externalbraces = -1;
444                                 external = NO;
445                         }
446                         structfield = NO;
447                         initializer = NO;
448                         goto more;
449                         /* NOTREACHED */
450                 }
451 <IN_PREPROC>define{ws}+{identifier}     {
452                                 
453                         /* preprocessor macro or constant definition */
454                         ppdefine = YES;
455                         token = DEFINE;
456                         if (compress == YES) {
457                                 my_yytext[0] = '\1';    /* compress the keyword */
458                         }
459                 findident:
460                         /* search backwards through yytext[] to find the identifier */
461                         /* NOTE: this had better be left to flex, by use of
462                          * yet another starting condition */
463                         my_yymore();
464                         first = my_yyleng - 1;
465                         while (my_yytext[first] != ' ' && my_yytext[first] != '\t') {
466                                 --first;
467                         }
468                         ++first;
469                         last = my_yyleng;
470                         BEGIN(INITIAL);
471                         goto definition;
472                         /* NOTREACHED */
473                 }
474 <IN_PREPROC>\n  {   /* unknown preprocessor line */
475                         BEGIN(INITIAL);
476                         ++myylineno;
477                         goto more;
478                         /* NOTREACHED */
479                 }
480 <IN_PREPROC>.            |
481 <IN_PREPROC>{identifier}        {   /* unknown preprocessor line */
482                         BEGIN(INITIAL);
483                         goto more;
484                         /* NOTREACHED */
485                 }
486
487 class{wsnl}+{identifier}({wsnl}|{identifier}|[():])*\{  {       /* class definition */
488                         classdef = YES;
489                         tagdef =  'c';
490                         yyless(5);              /* eat up 'class', and re-scan */
491                         yy_set_bol(0);
492                         goto more;
493                         /* NOTREACHED */
494                 }
495
496 ("enum"|"struct"|"union")       {
497                         ident_start = first;
498                         BEGIN(WAS_ESU);
499                         goto more;
500                 }
501 <WAS_ESU>{
502 ({wsnl}+{identifier}){wsnl}*\{          { /* e/s/u definition */
503                         tagdef = my_yytext[ident_start];
504                         BEGIN(WAS_IDENTIFIER);
505                         goto ident;
506                 }
507 {wsnl}*\{               { /* e/s/u definition without a tag */
508                         tagdef = my_yytext[ident_start];
509                         BEGIN(INITIAL);
510                         if (braces == 0) {
511                                 esudef = YES;
512                         }
513                         last = first;
514                         yyless(0);  /* re-scan all this as normal text */
515                         tagdef = '\0';
516                         goto more;
517                 }
518 ({wsnl}+{identifier})?{wsnl}*   |   
519 .|\n                                                                            {   /* e/s/u usage */
520                         BEGIN(WAS_IDENTIFIER);
521                         goto ident;
522                 }
523 }
524
525 if{wsnl}*\(     {       /* ignore 'if' */
526                         yyless(2);
527                         yy_set_bol(0);
528                         goto more;
529 }       
530
531 {identifier}    {       /* identifier found: do nothing, yet. (!) */
532                         BEGIN(WAS_IDENTIFIER);
533                         ident_start = first;
534                         goto more;
535                         /* NOTREACHED */
536                 }
537
538 <WAS_IDENTIFIER>{       
539 {ws}*\(({wsnl}|{identifier}|{number}|[*&[\]=,.:])*\)([()]|{wsnl})*[:a-zA-Z_#{]  {
540                         /* a function definition */
541                         /* note: "#define a (b) {" and "#if defined(a)\n#" 
542                          * are not fcn definitions! */
543                         /* warning: "if (...)" must not overflow yytext, 
544                          * so the content of function argument definitions 
545                          * is restricted, in particular parentheses are 
546                          * not allowed */
547                         /* FIXME HBB 20001003: the above 'not allowed' may well be the
548                          * reason for the parsing bug concerning function pointer usage,
549                          * I suspect. --- I think my new special-case rule for 'if'
550                          * could be helpful in removing that limitation */
551                         if ((braces == 0 && ppdefine == NO && my_yytext[0] != '#' && rules == NO) ||
552                             (braces == 1 && classdef == YES)) {
553                                 fcndef = YES;
554                                 token = FCNDEF;
555                                 goto fcn;
556                                 /* NOTREACHED */
557                         }
558                         goto fcncal;
559                         /* NOTREACHED */
560                 }
561 {ws}*\(([*&[\]=,.]|{identifier}|{number}|{wsnl})*               {       /* function call */
562                 fcncal: if (fcndef == YES || ppdefine == YES || rules == YES) {
563                                 token = FCNCALL;
564                                 goto fcn;
565                                 /* NOTREACHED */
566                         }
567                         if (template == NO) {
568                                 templateparens = parens;
569                                 template = YES;
570                         }
571                         goto ident;
572                         /* NOTREACHED */
573                 }
574 ("*"|{wsnl})+{identifier}               {       /* typedef name or modifier use */
575                         goto ident;
576                         /* NOTREACHED */
577                 }
578 .|\n    {               /* general identifer usage */
579                         char    *s;
580
581                         if (global == YES && ppdefine == NO && my_yytext[0] != '#' &&
582                             external == NO && initializer == NO && 
583                             arraydimension == NO && structfield == NO &&
584                             template == NO && fcndef == NO) {
585                                 if (esudef == YES) {    
586                                         /* if enum/struct/union */
587                                         token = MEMBERDEF;
588                                 } else {
589                                         token = GLOBALDEF;
590                                 }
591                         } else {
592                 ident:
593                                 token = IDENT;
594                         }
595                 fcn:
596                         if (YYSTATE == WAS_IDENTIFIER) {
597                                 /* Position back to the actual identifier: */
598                                 last = first; 
599                                 first = ident_start;
600                                 yyless(0);
601                                 /* HBB 20001008: if the anti-backup-pattern above matched,
602                                  * and the matched context ended with a \n, then the scanner
603                                  * believes it's at the start of a new line. But the yyless()
604                                  * should feeds that \n back into the input, so that's
605                                  * wrong. --> force 'beginning-of-line' status off. */
606                                 yy_set_bol(0);
607                                 BEGIN(INITIAL);
608                         } else {
609                                 my_yymore();
610                                 last = my_yyleng;
611                         }
612                 definition:
613
614                         /* if a long line */
615                         if (yyleng > STMTMAX) {
616                                 int     c;
617                                 
618                                 /* skip to the end of the line */
619                                 warning("line too long");
620                                 while ((c = skipcomment_input()) > LEXEOF) { 
621                                         if (c == '\n') {
622                                                 unput(c);
623                                                 break;
624                                         }
625                                 }
626                         }
627                         /* truncate a long symbol */
628                         if (yyleng > PATLEN) {
629                                 warning("symbol too long");
630                                 my_yyleng = first + PATLEN;
631                                 my_yytext[my_yyleng] = '\0';
632                         }
633
634                         /* if found word was a keyword: */
635                         if ((s = lookup(my_yytext + first)) != NULL) {
636                                 first = my_yyleng;
637                                 
638                                 /* if the start of a typedef */
639                                 if (s == typedeftext) {
640                                         typedefbraces = braces;
641                                         oldtype = YES;
642                                 }
643                                 /* if an enum/struct/union */
644                                 /* (needed for "typedef struct tag name;" so
645                                    tag isn't marked as the typedef name) */
646                                 else if (s == enumtext || s == structtext || s == uniontext) {
647                                         /* do nothing */
648                                 } else if (s == externtext) {
649                                         /* if an external definition */
650                                         externalbraces = braces;
651                                         external = YES;
652                                 } else if (templateparens == parens && template == YES) {
653                                         /* keyword doesn't start a function 
654                                          * template */
655                                         templateparens = -1;
656                                         template = NO;
657                                 } else {        
658                                         /* identifier after typedef was a 
659                                          * keyword */
660                                         oldtype = NO;
661                                 }
662                         } else {        
663                                 /* not a keyword --> found an identifier */
664                                 /* last = yyleng; */
665                                 
666                                 /* if a class/enum/struct/union tag definition */
667                                 /* FIXME HBB 20001001: why reject "class"? */
668                                 if (tagdef && strnotequal(my_yytext + first, "class")) {
669                                         token = tagdef;
670                                         tagdef = '\0';
671                                         if (braces == 0) {
672                                                 esudef = YES;
673                                         }
674                                 } else if (braces == typedefbraces && oldtype == NO &&
675                                            arraydimension == NO) {
676                                         /* if a typedef name */
677                                         token = TYPEDEF;
678                                 } else {
679                                         oldtype = NO;
680                                 }
681                                 /* my_yymore(); */
682                                 return(token);
683                                 /* NOTREACHED */
684                         }
685                 }
686 }
687
688 \[              {       /* array dimension (don't worry or about subscripts) */
689                         arraydimension = YES;
690                         goto more;
691                         /* NOTREACHED */
692                 }
693 \]              {
694                         arraydimension = NO;
695                         goto more;
696                         /* NOTREACHED */
697                 }
698 \\\n            {       /* preprocessor statement is continued on next line */
699                         /* save the '\\' to the output file, but not the '\n': */
700                         yyleng = 1;
701                         my_yymore();
702                         goto eol;
703                         /* NOTREACHED */
704                 }
705 \n              {       /* end of the line */
706                         if (ppdefine == YES) {  /* end of a #define */
707                                 ppdefine = NO;
708                                 yyless(yyleng - 1);
709                                 last = first;
710                                 my_yymore();
711                                 return(DEFINEEND);
712                         }
713                         /* skip the first 8 columns of a breakpoint listing line */
714                         /* and skip the file path in the page header */
715                         if (bplisting == YES) {
716                                 int     c, i;
717
718                                 /* FIXME HBB 20001007: should call input() instead */
719                                 switch (skipcomment_input()) {  /* tab and EOF just fall through */
720                                 case ' ':       /* breakpoint number line */
721                                 case '[':
722                                         for (i = 1; i < 8 && skipcomment_input() > LEXEOF; ++i)
723                                                 ;
724                                         break;
725                                 case '.':       /* header line */
726                                 case '/':
727                                         /* skip to the end of the line */
728                                         while ((c = skipcomment_input()) > LEXEOF) {
729                                                 if (c == '\n') {
730                                                         unput(c);
731                                                         break;
732                                                 }
733                                         }
734                                         break;
735                                 case '\n':      /* empty line */
736                                         unput('\n');
737                                         break;
738                                 }
739                         }
740                 eol:
741                         ++myylineno;
742                         first = 0;
743                         last = 0;
744                         if (symbols > 0) {
745                                 /* no my_yymore(): \n doesn't need to be in my_yytext */
746                                 return(NEWLINE);
747                         }
748                         /* line ended --> flush my_yytext */
749                         if (my_yytext)
750                                 *my_yytext = '\0';
751                         my_yyleng = 0;
752                         lineno = myylineno;
753                 }
754
755 \'              {       /* character constant */
756                         if (sdl == NO) 
757                                 BEGIN(IN_SQUOTE);
758                         goto more;
759                         /* NOTREACHED */
760                 }
761 <IN_SQUOTE>\'   {       
762                         BEGIN(INITIAL);
763                         goto more;
764                         /* NOTREACHED */
765                 }
766 \"              {       /* string constant */
767                         BEGIN(IN_DQUOTE);
768                         goto more;
769                         /* NOTREACHED */
770                 }
771 <IN_DQUOTE>\"   {       
772                         BEGIN(INITIAL);
773                         goto more;
774                         /* NOTREACHED */
775                 }
776 <IN_DQUOTE,IN_SQUOTE>{
777 \n              {       /* syntax error: unexpected EOL */
778                         BEGIN(INITIAL);
779                         goto eol;
780                         /* NOTREACHED */
781                 }
782 \\.     |
783 .               {
784                         goto more;
785                         /* NOTREACHED */
786                 }
787 \\\n    {               /* line continuation inside a string! */
788                         myylineno++;
789                         goto more;
790                         /* NOTREACHED */
791                 }
792 }
793         
794 ^{ws}+          {               /* don't save leading white space */
795                 }
796                 
797 {ws}+\n         {               /* eat whitespace at end of line */
798                         unput('\n');
799                 }
800
801 [\t\r\v\f]+     {       /* eat non-blank whitespace sequences, replace
802                          * by single blank */
803                         unput(' ');
804                 }
805
806 {ws}{2,}        {   /* compress sequential whitespace here, not in putcrossref() */
807                         unput(' ');
808                 }
809
810 "/*"                                    yy_push_state(COMMENT);
811 <COMMENT>{
812 [^*\n]*                 |
813 "*"+[^*/\n]*    ; /* do nothing */
814 [^*\n]*\n               |
815 "*"+[^*/\n]*\n  {
816                         if (ppdefine == NO) {
817                                 goto eol;
818                         } else {
819                                 ++myylineno;
820                         }
821                         /* NOTREACHED */
822                 }
823 "*"+"/"         {
824                         /* replace the comment by a single blank */
825                         unput(' ');
826                         yy_pop_state();
827                 }
828 }               
829
830 "//".*\n?               {
831                         /* C++-style one-line comment */
832                         goto eol;
833                         /* NOTREACHED */
834                 }
835
836 {number}                                |       /* number */
837 <SDL>STATE[ \t]+                |   /* ... and other syntax error catchers... */
838 .                                               {       /* punctuation and operators */
839                                                 more:   
840                                                         my_yymore();
841                                                         first = my_yyleng;
842                                                 }
843
844 %%
845
846 void
847 initscanner(char *srcfile)
848 {
849         char    *s;
850         
851         if (maxifbraces == NULL) {
852                 maxifbraces = mymalloc(miflevel * sizeof(int));
853                 preifbraces = mymalloc(miflevel * sizeof(int));
854         }
855         first = 0;              /* buffer index for first char of symbol */
856         last = 0;               /* buffer index for last char of symbol */
857         lineno = 1;             /* symbol line number */
858         myylineno = 1;          /* input line number */
859         arraydimension = NO;    /* inside array dimension declaration */
860         bplisting = NO;         /* breakpoint listing */
861         braces = 0;             /* unmatched left brace count */
862         classdef = NO;          /* c++ class definition */
863         elseelif = NO;          /* #else or #elif found */
864         esudef = NO;            /* enum/struct/union global definition */
865         external = NO;          /* external definition */
866         externalbraces = -1;    /* external definition outer brace count */
867         fcndef = NO;            /* function definition */
868         global = YES;           /* file global scope (outside functions) */
869         iflevel = 0;            /* #if nesting level */
870         initializer = NO;       /* data initializer */
871         initializerbraces = -1; /* data initializer outer brace count */
872         lex = NO;               /* lex file */
873         parens = 0;             /* unmatched left parenthesis count */
874         ppdefine = NO;          /* preprocessor define statement */
875         pseudoelif = NO;        /* pseudo-#elif */
876         oldtype = NO;           /* next identifier is an old type */
877         rules = NO;             /* lex/yacc rules */
878         sdl = NO;               /* sdl file */
879         structfield = NO;       /* structure field declaration */
880         tagdef = '\0';          /* class/enum/struct/union tag definition */
881         template = NO;          /* function template */
882         templateparens = -1;    /* function template outer parentheses count */
883         typedefbraces = -1;     /* initial typedef braces count */
884         ident_start = 0;        /* start of previously found identifier */
885
886         if (my_yytext)
887                 *my_yytext = '\0';
888         my_yyleng = 0;
889         
890         BEGIN(INITIAL);
891
892         /* if this is not a C file */
893         if ((s = strrchr(srcfile, '.')) != NULL) {
894                 switch (*++s) { /* this switch saves time on C files */
895                 case 'b':
896                         if (strcmp(s, "bp") == 0) {     /* breakpoint listing */
897                                 bplisting = YES;
898                         }
899                         break;
900                 case 'l':
901                         if (strcmp(s, "l") == 0) {      /* lex */
902                                 lex = YES;
903                                 global = NO;
904                         }
905                         break;
906                 case 's':
907                         if (strcmp(s, "sd") == 0) {     /* sdl */
908                                 sdl = YES;
909                                 BEGIN(SDL);
910                         }
911                         break;
912                 case 'y':
913                         if (strcmp(s, "y") == 0) {      /* yacc */
914                                 global = NO;
915                         }
916                         break;
917                 }
918         }
919 }
920
921 #if !COMMENTS_BY_FLEX
922
923 /* A micro-scanner that serves as the input() function of the
924  * scanner. It throws away any comments in the input, correctly
925  * avoiding doing this inside string/character constants, and knows
926  * about backslash sequences. Now that the main scanner doesn't use
927  * yymore() any longer, this could be replaced by lex rules. Left for
928  * trying later. */
929
930 /* Status variable: If this is non-NUL, it's the character that
931 * terminates a string we're currently in. */
932 static int string_terminator = '\0';
933
934 /* Helper routine: treat 'c' as a character found inside a
935  * string. Check if this character might be the end of that
936  * string. Backslashes have to be taken care of, for the sake of
937  * "quotes like \"these\" found inside a string". */
938 static int
939 insidestring_input(int c)
940 {
941         static BOOL was_backslash = NO;
942         
943         if ((c == '\\') && (was_backslash == NO)) {
944                 /* escape character found --> treat next char specially */
945                 /* FIXME HBB 20001003: need treatment of backslash in the main
946                  * scanner, too. It'll get false line counts in case of "\\'",
947                  * otherwise --- they can occur as part of a lex pattern */
948                 was_backslash = YES;
949                 return c;
950         }
951
952         if (((c == '\t') && (lex == YES))
953             /* Note: "\\\n" is removed even inside strings! */
954             || ((c == '\n') && (was_backslash == NO))
955                 || (c == EOF)
956                 || ((c == string_terminator) && (was_backslash == NO))
957            ) {
958                 /* Line ended, or end-of-string was found. That is a syntax
959                  * error.  To recover, stop treatment as a string constant: */
960                 string_terminator = '\0';
961         } else if (!isprint((unsigned char)c)) {
962                 /* mask unprintable characters */
963                 c = ' ';
964         }
965         
966         was_backslash = NO;
967         return c;
968 }
969
970 /* Helper function: skip over input until end of comment is found (or
971  * we find that it wasn't really comment, in the first place): */
972 static int
973 comment(void)
974 {
975         int     c, lastc;
976
977         /* Coming here, we've just read in the opening '/' of a
978          * comment. */
979         do {
980                 if ((c = getc(yyin)) == '*') {  /* C comment */
981                         lastc = '\0';
982                         while ((c = getc(yyin)) != EOF
983                                    /* fewer '/'s --> test them first! */
984                                && (c != '/' || lastc != '*')
985                               ) { 
986                                 if (c == '\n') {
987                                         /* keep the line number count */
988                                         /* FIXME HBB 20001008: this is not synchronized
989                                          * properly with myylineno changes by the main
990                                          * scanner. A strong point in favour of moving
991                                          * this to lex-code that is, IMHO */
992                                         ++myylineno;
993                                 }
994                                 lastc = c;
995                         }
996                         /* return a blank for Reiser cpp token concatenation */
997                         /* FIXME HBB 20001008: what on earth is 'Reiser cpp'? ANSI
998                          * C defines cpp to explicitly replace any comment by a
999                          * blank. Pre-ANSI cpp's behaved differently, but do we
1000                          * really want that? If at all, it should only ever be a
1001                          * non-default option (like gcc's "-traditional-cpp")
1002                          * */
1003                         if ((c = getc(yyin)) == '_' || isalnum(c)) {
1004                                 (void) ungetc(c, yyin);
1005                                 c = ' ';
1006                                 break;
1007                         }
1008                 } else if (c == '/') {          /* C++ comment */
1009                         while ((c = getc(yyin)) != EOF && c != '\n') {
1010                                 ; /* do nothing else */
1011                         }
1012                         break;
1013                 } else {                                        /* not a comment */
1014                         (void) ungetc(c, yyin);
1015                         c = '/';
1016                         break;
1017                         /* NOTREACHED */
1018                 }
1019                 
1020         /* there may be an immediately following comment */
1021         } while (c == '/');
1022         return(c);
1023 }
1024
1025 /* The core of the actual input() function to be used by (f)lex. The
1026  * calling scheme between this and the actual input() redefinition is
1027  * a bit different for lex and flex. See the #ifdef FLEX_SCANNER part
1028  * in the head section. */
1029 static int
1030 skipcomment_input(void)
1031 {
1032         int     c;
1033
1034         c = getc (yyin);
1035         if (string_terminator != '\0') {
1036                 /* don't look for comments inside strings! */
1037                 return insidestring_input(c);
1038         } else if (c == '/') {
1039                 /* swallow everything until end of comment, if this is one */
1040                 return comment (); 
1041         } else if (c == '"' || c == '\'') {
1042                 /* a string is beginning here, so switch input method */
1043                 string_terminator = c;
1044         }
1045         
1046         return c;
1047 }
1048
1049 #endif /* !COMMENTS_BY_FLEX */
1050
1051 #define MY_YY_ALLOCSTEP 1000
1052 static void
1053 my_yymore(void)
1054 {
1055         static size_t yytext_size = 0;
1056         
1057         /* my_yytext is an ever-growing buffer. It will not ever
1058          * shrink, nor will it be freed at end of program, for now */
1059         while (my_yyleng + yyleng + 1 >= yytext_size) {
1060                 my_yytext = myrealloc(my_yytext, 
1061                                                   yytext_size += MY_YY_ALLOCSTEP);
1062         }
1063         
1064         strncpy (my_yytext + my_yyleng, yytext, yyleng+1);
1065         my_yyleng += yyleng;
1066 }