2 /*===========================================================================
3 Copyright (c) 1998-2000, The Santa Cruz Operation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 *Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 *Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 *Neither name of The Santa Cruz Operation nor the names of its contributors
17 may be used to endorse or promote products derived from this software
18 without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
21 IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
32 =========================================================================*/
34 /* cscope - interactive C symbol cross-reference
45 /* the line counting has been moved from character reading for speed */
46 /* comments are discarded */
49 # error Sorry, this scanner needs flex. It is not usable with AT&T Lex.
52 #define IFLEVELINC 5 /* #if nesting level size increment */
53 #define YY_NO_TOP_STATE 1
55 static char const rcsid[] = "$Id: fscanner.l,v 1.17 2012/08/02 21:48:08 broeker Exp $";
57 int first; /* buffer index for first char of symbol */
58 int last; /* buffer index for last char of symbol */
59 int lineno; /* symbol line number */
62 /* HBB 20001007: new variables, emulating yytext in a way that allows
63 * the yymore() simulation, my_yymore(), to be used even in the presence of
66 char *my_yytext = NULL;
68 static BOOL arraydimension; /* inside array dimension declaration */
69 static BOOL bplisting; /* breakpoint listing */
70 static int braces; /* unmatched left brace count */
71 static BOOL classdef; /* c++ class definition */
72 static BOOL elseelif; /* #else or #elif found */
73 static BOOL esudef; /* enum/struct/union global definition */
74 static BOOL external; /* external definition */
75 static int externalbraces; /* external definition outer brace count */
76 static BOOL fcndef; /* function definition */
77 static BOOL global; /* file global scope (outside functions) */
78 static int iflevel; /* #if nesting level */
79 static BOOL initializer; /* data initializer */
80 static int initializerbraces; /* data initializer outer brace count */
81 static BOOL lex; /* lex file */
82 static int miflevel = IFLEVELINC; /* maximum #if nesting level */
83 static int *maxifbraces; /* maximum brace count within #if */
84 static int *preifbraces; /* brace count before #if */
85 static int parens; /* unmatched left parenthesis count */
86 static BOOL ppdefine; /* preprocessor define statement */
87 static BOOL pseudoelif; /* pseudo-#elif */
88 static BOOL oldtype; /* next identifier is an old type */
89 static BOOL rules; /* lex/yacc rules */
90 static BOOL sdl; /* sdl file */
91 static BOOL structfield; /* structure field declaration */
92 static int tagdef; /* class/enum/struct/union tag definition */
93 static BOOL template; /* function template */
94 static int templateparens; /* function template outer parentheses count */
95 static int typedefbraces = -1; /* initial typedef brace count */
96 static int token; /* token found */
97 static int ident_start; /* begin of preceding identifier */
99 /* If this is defined to 1, use flex rules rather than the input
100 * function to discard comments. The scanner gains quite a bit of
101 * speed this way, because of a large reduction of the number of I/O
102 * system/library calls. The original skipcomment_input() called
103 * getc() so often that the call overhead of shared libraries
104 * vs. static linking, alone, already caused a sizeable performance
105 * hit (up to 40% gross gain on a cscope -cub of its own source
107 #define COMMENTS_BY_FLEX 1
109 #if !COMMENTS_BY_FLEX
110 static int skipcomment_input(void);
111 static int comment(void);
112 static int insidestring_input(int);
115 static void my_yymore(void);
118 # define skipcomment_input input
121 # define YY_INPUT(buf,result,max_size) \
123 int c = skipcomment_input (); \
124 result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
127 #endif /* !COMMENTS_BY_FLEX*/
131 identifier [a-zA-Z_$][a-zA-Z_0-9$]*
132 number \.?[0-9][.0-9a-fA-FlLuUxX]*
133 comment "/*"([^*]*("*"+[^/])?)*"*/"|"//"[^\n]*\n
135 wsnl [ \t\r\v\f\n]|{comment}
137 /* flex options: stack of start conditions, and don't use yywrap() */
145 /* exclusive start conditions. not available in AT&T lex -> use flex! */
146 %x IN_PREPROC WAS_ENDIF WAS_IDENTIFIER WAS_ESU IN_DQUOTE IN_SQUOTE COMMENT
150 %\{ { /* lex/yacc C declarations/definitions */
160 ^%% { /* lex/yacc rules delimiter */
163 /* this %% starts the section containing the rules */
166 /* Copy yytext to private buffer, to be able to add further
167 * content following it: */
170 /* simulate a yylex() or yyparse() definition */
171 (void) strcat(my_yytext, " /* ");
172 first = strlen(my_yytext);
174 (void) strcat(my_yytext, "yylex");
176 /* yacc: yyparse implicitly calls yylex */
177 char *s = " yylex()";
178 char *cp = s + strlen(s);
182 (void) strcat(my_yytext, "yyparse");
184 last = strlen(my_yytext);
185 (void) strcat(my_yytext, " */");
186 my_yyleng = strlen(my_yytext);
189 /* were in the rules section, now comes the closing one */
199 <SDL>STATE[ \t]+({identifier}|\*) { /* sdl state, treat as function def */
206 <SDL>ENDSTATE[ \t] { /* end of an sdl state, treat as end of a function */
211 \{ { /* count unmatched left braces for fcn def detection */
214 /* mark an untagged enum/struct/union so its beginning
230 \#{ws}* { /* start a preprocessor line */
231 if (rules == NO) /* don't consider CPP for lex/yacc rules */
233 yyleng = 1; /* get rid of the blanks, if any */
237 <IN_PREPROC>endif([^a-zA-Z0-9_$\n].*)? { /* #endif */
238 /* delay treatment of #endif depending on whether an
239 * #if comes right after it, or not */
240 /* HBB 20010619: new pattern allows trailing garbage
241 * after the #endif */
246 <WAS_ENDIF>\n{wsnl}*#{ws}*if(ndef|def)?{ws}+ {
247 /* attempt to correct erroneous brace count caused by:
256 /* the current #if must not have an #else or #elif */
257 if (elseelif == YES) {
263 yyless(1); /* rescan all but the line ending */
268 <WAS_ENDIF>\n{wsnl}* { /* an #endif with no #if right after it */
271 /* get the maximum brace count for this #if */
272 if (braces < maxifbraces[--iflevel]) {
273 braces = maxifbraces[iflevel];
283 <IN_PREPROC>ifndef{ws}+ |
284 <IN_PREPROC>ifdef{ws}+ |
285 <IN_PREPROC>if{ws}+ { /* #if directive */
287 if (pseudoelif == YES) {
292 /* make sure there is room for the current brace count */
293 if (iflevel == miflevel) {
294 miflevel += IFLEVELINC;
295 maxifbraces = myrealloc(maxifbraces, miflevel * sizeof(int));
296 preifbraces = myrealloc(preifbraces, miflevel * sizeof(int));
298 /* push the current brace count */
299 preifbraces[iflevel] = braces;
300 maxifbraces[iflevel++] = 0;
305 <IN_PREPROC>else({ws}.*)? { /* #else --- eat up whole line */
309 /* save the maximum brace count for this #if */
310 if (braces > maxifbraces[iflevel - 1]) {
311 maxifbraces[iflevel - 1] = braces;
313 /* restore the brace count to before the #if */
314 braces = preifbraces[iflevel - 1];
320 <IN_PREPROC>elif{ws}+ { /* #elif */
321 /* elseelif = YES; --- HBB I doubt this is correct */
325 /* save the maximum brace count for this #if */
326 if (braces > maxifbraces[iflevel - 1]) {
327 maxifbraces[iflevel - 1] = braces;
329 /* restore the brace count to before the #if */
330 braces = preifbraces[iflevel - 1];
337 <IN_PREPROC>include{ws}*\"[^"\n]+\" |
338 <IN_PREPROC>include{ws}*<[^>\n]+> { /* #include file */
340 char remember = yytext[yyleng-1];
343 s = strpbrk(my_yytext, "\"<");
346 my_yytext[my_yyleng-1] = '\0';
348 my_yytext[my_yyleng-1] = remember;
349 first = s - my_yytext;
350 last = my_yyleng - 1;
351 if (compress == YES) {
352 my_yytext[0] = '\2'; /* compress the keyword */
360 /* could be the last enum member initializer */
361 if (braces == initializerbraces) {
362 initializerbraces = -1;
370 if (braces == 0 || (braces == 1 && classdef == YES)) {
372 /* if the end of an enum/struct/union definition */
376 /* if the end of the function */
377 else if (fcndef == YES) {
388 \( { /* count unmatched left parentheses for function templates */
397 /* if the end of a function template */
398 if (parens == templateparens) {
405 = { /* if a global definition initializer */
408 if (global == YES && ppdefine == NO && my_yytext[0] != '#') {
409 initializerbraces = braces;
415 : { /* a if global structure field */
418 if (global == YES && ppdefine == NO && my_yytext[0] != '#') {
425 if (braces == initializerbraces) {
426 initializerbraces = -1;
433 ; { /* if the enum/struct/union was not a definition */
437 /* if the end of a typedef */
438 if (braces == typedefbraces) {
441 /* if the end of a external definition */
442 if (braces == externalbraces) {
451 <IN_PREPROC>define{ws}+{identifier} {
453 /* preprocessor macro or constant definition */
456 if (compress == YES) {
457 my_yytext[0] = '\1'; /* compress the keyword */
460 /* search backwards through yytext[] to find the identifier */
461 /* NOTE: this had better be left to flex, by use of
462 * yet another starting condition */
464 first = my_yyleng - 1;
465 while (my_yytext[first] != ' ' && my_yytext[first] != '\t') {
474 <IN_PREPROC>\n { /* unknown preprocessor line */
481 <IN_PREPROC>{identifier} { /* unknown preprocessor line */
487 class{wsnl}+{identifier}({wsnl}|{identifier}|[():])*\{ { /* class definition */
490 yyless(5); /* eat up 'class', and re-scan */
496 ("enum"|"struct"|"union") {
502 ({wsnl}+{identifier}){wsnl}*\{ { /* e/s/u definition */
503 tagdef = my_yytext[ident_start];
504 BEGIN(WAS_IDENTIFIER);
507 {wsnl}*\{ { /* e/s/u definition without a tag */
508 tagdef = my_yytext[ident_start];
514 yyless(0); /* re-scan all this as normal text */
518 ({wsnl}+{identifier})?{wsnl}* |
519 .|\n { /* e/s/u usage */
520 BEGIN(WAS_IDENTIFIER);
525 if{wsnl}*\( { /* ignore 'if' */
531 {identifier} { /* identifier found: do nothing, yet. (!) */
532 BEGIN(WAS_IDENTIFIER);
539 {ws}*\(({wsnl}|{identifier}|{number}|[*&[\]=,.:])*\)([()]|{wsnl})*[:a-zA-Z_#{] {
540 /* a function definition */
541 /* note: "#define a (b) {" and "#if defined(a)\n#"
542 * are not fcn definitions! */
543 /* warning: "if (...)" must not overflow yytext,
544 * so the content of function argument definitions
545 * is restricted, in particular parentheses are
547 /* FIXME HBB 20001003: the above 'not allowed' may well be the
548 * reason for the parsing bug concerning function pointer usage,
549 * I suspect. --- I think my new special-case rule for 'if'
550 * could be helpful in removing that limitation */
551 if ((braces == 0 && ppdefine == NO && my_yytext[0] != '#' && rules == NO) ||
552 (braces == 1 && classdef == YES)) {
561 {ws}*\(([*&[\]=,.]|{identifier}|{number}|{wsnl})* { /* function call */
562 fcncal: if (fcndef == YES || ppdefine == YES || rules == YES) {
567 if (template == NO) {
568 templateparens = parens;
574 ("*"|{wsnl})+{identifier} { /* typedef name or modifier use */
578 .|\n { /* general identifer usage */
581 if (global == YES && ppdefine == NO && my_yytext[0] != '#' &&
582 external == NO && initializer == NO &&
583 arraydimension == NO && structfield == NO &&
584 template == NO && fcndef == NO) {
586 /* if enum/struct/union */
596 if (YYSTATE == WAS_IDENTIFIER) {
597 /* Position back to the actual identifier: */
601 /* HBB 20001008: if the anti-backup-pattern above matched,
602 * and the matched context ended with a \n, then the scanner
603 * believes it's at the start of a new line. But the yyless()
604 * should feeds that \n back into the input, so that's
605 * wrong. --> force 'beginning-of-line' status off. */
615 if (yyleng > STMTMAX) {
618 /* skip to the end of the line */
619 warning("line too long");
620 while ((c = skipcomment_input()) > LEXEOF) {
627 /* truncate a long symbol */
628 if (yyleng > PATLEN) {
629 warning("symbol too long");
630 my_yyleng = first + PATLEN;
631 my_yytext[my_yyleng] = '\0';
634 /* if found word was a keyword: */
635 if ((s = lookup(my_yytext + first)) != NULL) {
638 /* if the start of a typedef */
639 if (s == typedeftext) {
640 typedefbraces = braces;
643 /* if an enum/struct/union */
644 /* (needed for "typedef struct tag name;" so
645 tag isn't marked as the typedef name) */
646 else if (s == enumtext || s == structtext || s == uniontext) {
648 } else if (s == externtext) {
649 /* if an external definition */
650 externalbraces = braces;
652 } else if (templateparens == parens && template == YES) {
653 /* keyword doesn't start a function
658 /* identifier after typedef was a
663 /* not a keyword --> found an identifier */
666 /* if a class/enum/struct/union tag definition */
667 /* FIXME HBB 20001001: why reject "class"? */
668 if (tagdef && strnotequal(my_yytext + first, "class")) {
674 } else if (braces == typedefbraces && oldtype == NO &&
675 arraydimension == NO) {
676 /* if a typedef name */
688 \[ { /* array dimension (don't worry or about subscripts) */
689 arraydimension = YES;
698 \\\n { /* preprocessor statement is continued on next line */
699 /* save the '\\' to the output file, but not the '\n': */
705 \n { /* end of the line */
706 if (ppdefine == YES) { /* end of a #define */
713 /* skip the first 8 columns of a breakpoint listing line */
714 /* and skip the file path in the page header */
715 if (bplisting == YES) {
718 /* FIXME HBB 20001007: should call input() instead */
719 switch (skipcomment_input()) { /* tab and EOF just fall through */
720 case ' ': /* breakpoint number line */
722 for (i = 1; i < 8 && skipcomment_input() > LEXEOF; ++i)
725 case '.': /* header line */
727 /* skip to the end of the line */
728 while ((c = skipcomment_input()) > LEXEOF) {
735 case '\n': /* empty line */
745 /* no my_yymore(): \n doesn't need to be in my_yytext */
748 /* line ended --> flush my_yytext */
755 \' { /* character constant */
766 \" { /* string constant */
776 <IN_DQUOTE,IN_SQUOTE>{
777 \n { /* syntax error: unexpected EOL */
787 \\\n { /* line continuation inside a string! */
794 ^{ws}+ { /* don't save leading white space */
797 {ws}+\n { /* eat whitespace at end of line */
801 [\t\r\v\f]+ { /* eat non-blank whitespace sequences, replace
806 {ws}{2,} { /* compress sequential whitespace here, not in putcrossref() */
810 "/*" yy_push_state(COMMENT);
813 "*"+[^*/\n]* ; /* do nothing */
816 if (ppdefine == NO) {
824 /* replace the comment by a single blank */
831 /* C++-style one-line comment */
836 {number} | /* number */
837 <SDL>STATE[ \t]+ | /* ... and other syntax error catchers... */
838 . { /* punctuation and operators */
847 initscanner(char *srcfile)
851 if (maxifbraces == NULL) {
852 maxifbraces = mymalloc(miflevel * sizeof(int));
853 preifbraces = mymalloc(miflevel * sizeof(int));
855 first = 0; /* buffer index for first char of symbol */
856 last = 0; /* buffer index for last char of symbol */
857 lineno = 1; /* symbol line number */
858 myylineno = 1; /* input line number */
859 arraydimension = NO; /* inside array dimension declaration */
860 bplisting = NO; /* breakpoint listing */
861 braces = 0; /* unmatched left brace count */
862 classdef = NO; /* c++ class definition */
863 elseelif = NO; /* #else or #elif found */
864 esudef = NO; /* enum/struct/union global definition */
865 external = NO; /* external definition */
866 externalbraces = -1; /* external definition outer brace count */
867 fcndef = NO; /* function definition */
868 global = YES; /* file global scope (outside functions) */
869 iflevel = 0; /* #if nesting level */
870 initializer = NO; /* data initializer */
871 initializerbraces = -1; /* data initializer outer brace count */
872 lex = NO; /* lex file */
873 parens = 0; /* unmatched left parenthesis count */
874 ppdefine = NO; /* preprocessor define statement */
875 pseudoelif = NO; /* pseudo-#elif */
876 oldtype = NO; /* next identifier is an old type */
877 rules = NO; /* lex/yacc rules */
878 sdl = NO; /* sdl file */
879 structfield = NO; /* structure field declaration */
880 tagdef = '\0'; /* class/enum/struct/union tag definition */
881 template = NO; /* function template */
882 templateparens = -1; /* function template outer parentheses count */
883 typedefbraces = -1; /* initial typedef braces count */
884 ident_start = 0; /* start of previously found identifier */
892 /* if this is not a C file */
893 if ((s = strrchr(srcfile, '.')) != NULL) {
894 switch (*++s) { /* this switch saves time on C files */
896 if (strcmp(s, "bp") == 0) { /* breakpoint listing */
901 if (strcmp(s, "l") == 0) { /* lex */
907 if (strcmp(s, "sd") == 0) { /* sdl */
913 if (strcmp(s, "y") == 0) { /* yacc */
921 #if !COMMENTS_BY_FLEX
923 /* A micro-scanner that serves as the input() function of the
924 * scanner. It throws away any comments in the input, correctly
925 * avoiding doing this inside string/character constants, and knows
926 * about backslash sequences. Now that the main scanner doesn't use
927 * yymore() any longer, this could be replaced by lex rules. Left for
930 /* Status variable: If this is non-NUL, it's the character that
931 * terminates a string we're currently in. */
932 static int string_terminator = '\0';
934 /* Helper routine: treat 'c' as a character found inside a
935 * string. Check if this character might be the end of that
936 * string. Backslashes have to be taken care of, for the sake of
937 * "quotes like \"these\" found inside a string". */
939 insidestring_input(int c)
941 static BOOL was_backslash = NO;
943 if ((c == '\\') && (was_backslash == NO)) {
944 /* escape character found --> treat next char specially */
945 /* FIXME HBB 20001003: need treatment of backslash in the main
946 * scanner, too. It'll get false line counts in case of "\\'",
947 * otherwise --- they can occur as part of a lex pattern */
952 if (((c == '\t') && (lex == YES))
953 /* Note: "\\\n" is removed even inside strings! */
954 || ((c == '\n') && (was_backslash == NO))
956 || ((c == string_terminator) && (was_backslash == NO))
958 /* Line ended, or end-of-string was found. That is a syntax
959 * error. To recover, stop treatment as a string constant: */
960 string_terminator = '\0';
961 } else if (!isprint((unsigned char)c)) {
962 /* mask unprintable characters */
970 /* Helper function: skip over input until end of comment is found (or
971 * we find that it wasn't really comment, in the first place): */
977 /* Coming here, we've just read in the opening '/' of a
980 if ((c = getc(yyin)) == '*') { /* C comment */
982 while ((c = getc(yyin)) != EOF
983 /* fewer '/'s --> test them first! */
984 && (c != '/' || lastc != '*')
987 /* keep the line number count */
988 /* FIXME HBB 20001008: this is not synchronized
989 * properly with myylineno changes by the main
990 * scanner. A strong point in favour of moving
991 * this to lex-code that is, IMHO */
996 /* return a blank for Reiser cpp token concatenation */
997 /* FIXME HBB 20001008: what on earth is 'Reiser cpp'? ANSI
998 * C defines cpp to explicitly replace any comment by a
999 * blank. Pre-ANSI cpp's behaved differently, but do we
1000 * really want that? If at all, it should only ever be a
1001 * non-default option (like gcc's "-traditional-cpp")
1003 if ((c = getc(yyin)) == '_' || isalnum(c)) {
1004 (void) ungetc(c, yyin);
1008 } else if (c == '/') { /* C++ comment */
1009 while ((c = getc(yyin)) != EOF && c != '\n') {
1010 ; /* do nothing else */
1013 } else { /* not a comment */
1014 (void) ungetc(c, yyin);
1020 /* there may be an immediately following comment */
1025 /* The core of the actual input() function to be used by (f)lex. The
1026 * calling scheme between this and the actual input() redefinition is
1027 * a bit different for lex and flex. See the #ifdef FLEX_SCANNER part
1028 * in the head section. */
1030 skipcomment_input(void)
1035 if (string_terminator != '\0') {
1036 /* don't look for comments inside strings! */
1037 return insidestring_input(c);
1038 } else if (c == '/') {
1039 /* swallow everything until end of comment, if this is one */
1041 } else if (c == '"' || c == '\'') {
1042 /* a string is beginning here, so switch input method */
1043 string_terminator = c;
1049 #endif /* !COMMENTS_BY_FLEX */
1051 #define MY_YY_ALLOCSTEP 1000
1055 static size_t yytext_size = 0;
1057 /* my_yytext is an ever-growing buffer. It will not ever
1058 * shrink, nor will it be freed at end of program, for now */
1059 while (my_yyleng + yyleng + 1 >= yytext_size) {
1060 my_yytext = myrealloc(my_yytext,
1061 yytext_size += MY_YY_ALLOCSTEP);
1064 strncpy (my_yytext + my_yyleng, yytext, yyleng+1);
1065 my_yyleng += yyleng;