2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
16 * scan.c - the jam yacc scanner
18 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
19 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
20 * Also handle tokens abutting EOF by remembering
21 * to return EOF now matter how many times yylex()
23 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
24 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
25 * defined before Linux's yacc tries to redefine it.
34 #include "jamgramtab.h"
40 struct include * next; /* next serial include file */
41 char * string; /* pointer into current line */
42 char * * strings; /* for yyfparse() -- text to parse */
43 FILE * file; /* for yyfparse() -- file being read */
44 char * fname; /* for yyfparse() -- file name */
45 int line; /* line counter for error messages */
46 char buf[ 512 ]; /* for yyfparse() -- line buffer */
49 static struct include * incp = 0; /* current file; head of chain */
51 static int scanmode = SCAN_NORMAL;
52 static int anyerrors = 0;
55 static char * symdump( YYSTYPE * );
57 #define BIGGEST_TOKEN 10240 /* no single token can be larger */
61 * Set parser mode: normal, string, or keyword.
70 void yyerror( char * s )
72 /* We use yylval instead of incp to access the error location information as
73 * the incp pointer will already be reset to 0 in case the error occurred at
76 * The two may differ only if we get an error while reading a lexical token
77 * spanning muliple lines, e.g. a multi-line string literal or action body,
78 * in which case yylval location information will hold the information about
79 * where this token started while incp will hold the information about where
82 * TODO: Test the theory about when yylval and incp location information are
83 * the same and when they differ.
85 printf( "%s:%d: %s at %s\n", yylval.file, yylval.line, s, symdump( &yylval ) );
92 return anyerrors != 0;
96 void yyfparse( char * s )
98 struct include * i = (struct include *)BJAM_MALLOC( sizeof( *i ) );
100 /* Push this onto the incp chain. */
104 i->fname = copystr( s );
109 /* If the filename is "+", it means use the internal jambase. */
110 if ( !strcmp( s, "+" ) )
111 i->strings = jambase;
116 * yyline() - read new line and return first character.
118 * Fabricates a continuous stream of characters across include files, returning
119 * EOF at the bitter end.
124 struct include * i = incp;
129 /* Once we start reading from the input stream, we reset the include
130 * insertion point so that the next include file becomes the head of the
134 /* If there is more data in this line, return it. */
138 /* If we are reading from an internal string list, go to the next string. */
144 i->string = *(i->strings++);
150 /* If necessary, open the file. */
154 if ( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
159 /* If there is another line in this file, start it. */
160 if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
168 /* This include is done. Free it up and return EOF so yyparse() returns to
174 /* Close file, free name. */
175 if ( i->file && ( i->file != stdin ) )
178 BJAM_FREE( (char *)i );
185 * yylex() - set yylval to current token; return its type.
187 * Macros to move things along:
189 * yychar() - return and advance character; invalid after EOF.
190 * yyprev() - back up one character; invalid before yychar().
192 * yychar() returns a continuous stream of characters, until it hits the EOF of
193 * the current include file.
196 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
197 #define yyprev() ( incp->string-- )
202 char buf[ BIGGEST_TOKEN ];
208 /* Get first character (whitespace or of token). */
211 if ( scanmode == SCAN_STRING )
213 /* If scanning for a string (action's {}'s), look for the closing brace.
214 * We handle matching braces, if they match.
219 while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
224 if ( ( c == '}' ) && !--nest )
231 /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
232 if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
236 /* We ate the ending brace -- regurgitate it. */
240 /* Check for obvious errors. */
241 if ( b == buf + sizeof( buf ) )
243 yyerror( "action block too big" );
249 yyerror( "unmatched {} in action block" );
254 yylval.type = STRING;
255 yylval.string = newstr( buf );
256 yylval.file = incp->fname;
257 yylval.line = incp->line;
266 /* Eat white space. */
269 /* Skip past white space. */
270 while ( ( c != EOF ) && isspace( c ) )
277 /* Swallow up comment line. */
278 while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
281 /* c now points to the first character of a token. */
285 yylval.file = incp->fname;
286 yylval.line = incp->line;
288 /* While scanning the word, disqualify it for (expensive) keyword lookup
289 * when we can: $anything, "anything", \anything
291 notkeyword = c == '$';
293 /* Look for white space to delimit word. "'s get stripped but preserve
294 * white space. \ protects next character.
299 ( b < buf + sizeof( buf ) ) &&
300 ( inquote || !isspace( c ) )
309 else if ( c != '\\' )
314 else if ( ( c = yychar() ) != EOF )
335 /* Check obvious errors. */
336 if ( b == buf + sizeof( buf ) )
338 yyerror( "string too big" );
344 yyerror( "unmatched \" in string" );
348 /* We looked ahead a character - back up. */
352 /* Scan token table. Do not scan if it is obviously not a keyword or if
353 * it is an alphabetic when were looking for punctuation.
359 if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
360 for ( k = keywords; k->word; ++k )
361 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
363 yylval.type = k->type;
364 yylval.string = k->word; /* used by symdump */
368 if ( yylval.type == ARG )
369 yylval.string = newstr( buf );
373 printf( "scan %s\n", symdump( &yylval ) );
378 /* We do not reset yylval.file & yylval.line here so unexpected EOF error
379 * messages would include correct error location information.
386 static char * symdump( YYSTYPE * s )
388 static char buf[ BIGGEST_TOKEN + 20 ];
391 case EOF : sprintf( buf, "EOF" ); break;
392 case 0 : sprintf( buf, "unknown symbol %s", s->string ); break;
393 case ARG : sprintf( buf, "argument %s" , s->string ); break;
394 case STRING: sprintf( buf, "string \"%s\"" , s->string ); break;
395 default : sprintf( buf, "keyword %s" , s->string ); break;
402 * Get information about the current file and line, for those epsilon
403 * transitions that produce a parse.
406 void yyinput_stream( char * * name, int * line )