ragel/rlscan.lex

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 %{
  23
  24 #define YY_NEVER_INTERACTIVE 1
  25 //#define WANT_TOKEN_WRITE
  26
  27 #include <iostream>
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "buffer.h"
  32
  33 using std::cout;
  34 using std::cerr;
  35 using std::endl;
  36
  37 Buffer tokbuf;
  38 int builtinBrace = 0;
  39 bool inlineWhitespace = true;
  40 bool handlingInclude = false;
  41 bool multiline = false;
  42
  43 /* Used for recognising host language code blocks, init with anything not
  44  * involved in the host lang test. */
  45 int previous_tokens[2] = { TK_Section, TK_Section };
  46
  47 /* These keep track of the start of an inline comment or literal string for
  48  * reporting unterminated comments or strings. */
  49 int il_comm_lit_first_line;
  50 int il_comm_lit_first_column;
  51
  52 /* These keep track of the start of a code block for reporting unterminated
  53  * code blocks. */
  54 int il_code_first_line;
  55 int il_code_first_column;
  56
  57 /* Include Stack data. */
  58 YY_BUFFER_STATE buff_stack[INCLUDE_STACK_SIZE];
  59 bool multiline_stack[INCLUDE_STACK_SIZE];
  60 int inc_stack_ptr = 0;
  61
  62 YYSTYPE *yylval;
  63 YYLTYPE *yylloc;
  64
  65 extern InputData *id;
  66 extern int includeDepth;
  67
  68 void garble();
  69
  70 void extendToken( char *data, int len );
  71 void extendToken();
  72
  73 int emitToken( int token, char *data, int len );
  74 int emitNoData( int token );
  75 void passThrough( char *data );
  76 bool openMachineSpecBlock();
  77 void popInclude();
  78
  79 enum InlineBlockType {
  80         CurlyDelimited,
  81         SemiTerminated
  82 } inlineBlockType;
  83
  84 /* Using a wrapper for the parser, must the lex declaration. */
  85 #define YY_DECL int ragel_lex()
  86
  87 %}
  88
  89 /* Outside an fsm machine specification ("outside code"). */
  90 %x              OC_SGL_LIT
  91 %x              OC_DBL_LIT
  92 %x              OC_C_COM
  93 %x              OC_CXX_COM
  94
  95 /* Inside a fsm machine specification. */
  96 %x              RL_INITIAL
  97 %x              RL_SLIT
  98 %x              RL_DLIT
  99 %x              RL_OREXP
 100 %x              RL_REGEXP
 101 %x              RL_REGEXP_OR
 102 %x              RL_SHELL_COM
 103 %x              RL_VERBOSE_EMBED
 104 %x              RL_WRITE
 105
 106 /* Inline code. */
 107 %x              IL_INITIAL
 108 %x              IL_SGL_LIT
 109 %x              IL_DBL_LIT
 110 %x              IL_C_COM
 111 %x              IL_CXX_COM
 112
 113 WSCHAR [\t\n\v\f\r ]
 114 IDENT [a-zA-Z_][a-zA-Z_0-9]*
 115
 116 %%
 117
 118         /* Numbers in outter code. */
 119 <INITIAL>[0-9]+ {
 120         garble();
 121         passThrough( yytext );
 122 }
 123
 124         /* Words in outter code. */
 125 <INITIAL>{IDENT} {
 126         garble();
 127         passThrough( yytext );
 128 }
 129
 130         /* Begin a c style comment. */
 131 <INITIAL>"/*" {
 132         BEGIN(OC_C_COM);
 133         extendToken();
 134         passThrough( yytext );
 135 }
 136         /* Data in a C style comment. */
 137 <OC_C_COM>.             extendToken(); passThrough( yytext );
 138 <OC_C_COM>\n            extendToken(); passThrough( yytext );
 139
 140         /* Terminate a C style comment. */
 141 <OC_C_COM>"*/" {
 142         BEGIN(INITIAL);
 143         garble();
 144         passThrough( yytext );
 145 }
 146
 147         /* Begin a C++ style comment. */
 148 <INITIAL>"//" {
 149         BEGIN(OC_CXX_COM);
 150         extendToken();
 151         passThrough( yytext );
 152 }
 153         /* Data in a C++ style comment. */
 154 <OC_CXX_COM>[^\n]+ {
 155         extendToken();
 156         passThrough( yytext );
 157 }
 158         /* Terminate a C++ style comment. */
 159 <OC_CXX_COM>\n {
 160         BEGIN(INITIAL);
 161         garble();
 162         passThrough( yytext );
 163 }
 164
 165
 166         /* Start literals. */
 167 <INITIAL>\' {
 168         BEGIN(OC_SGL_LIT);
 169         extendToken();
 170         passThrough( yytext );
 171 }
 172 <INITIAL>\" {
 173         BEGIN(OC_DBL_LIT);
 174         extendToken();
 175         passThrough( yytext );
 176 }
 177         /* Various escape sequences in literals. We don't need to get them
 178          * all here. We just need to pick off the ones that could confuse us
 179          * about the literal we are matchine */
 180 <OC_SGL_LIT,OC_DBL_LIT>\\\'             extendToken(); passThrough( yytext );
 181 <OC_SGL_LIT,OC_DBL_LIT>\\\"             extendToken(); passThrough( yytext );
 182 <OC_SGL_LIT,OC_DBL_LIT>\\\\             extendToken(); passThrough( yytext );
 183         /* Characters in literals. */
 184 <OC_DBL_LIT>[^\"]                               extendToken(); passThrough( yytext );
 185 <OC_SGL_LIT>[^\']                               extendToken(); passThrough( yytext );
 186         /* Terminate a double literal */
 187 <OC_DBL_LIT>\" {
 188         BEGIN(INITIAL);
 189         garble();
 190         passThrough( yytext );
 191 }
 192         /* Terminate a single literal. */
 193 <OC_SGL_LIT>\' {
 194         BEGIN(INITIAL);
 195         garble();
 196         passThrough( yytext );
 197 }
 198
 199         /* Whitespace. */
 200 <INITIAL>{WSCHAR}+ {
 201         garble();
 202         passThrough( yytext );
 203 }
 204
 205         /* Section Deliminator */
 206 <INITIAL>"%%" {
 207         BEGIN(RL_INITIAL);
 208         multiline = false;
 209         return emitNoData( TK_Section );
 210 }
 211
 212         /* Section Deliminator */
 213 <INITIAL>"%%{" {
 214         BEGIN(RL_INITIAL);
 215         multiline = true;
 216         return emitNoData( TK_Section );
 217 }
 218
 219 <INITIAL>"{" {
 220         garble();
 221         passThrough( yytext );
 222 }
 223
 224 <INITIAL>"}" {
 225         garble();
 226         passThrough( yytext );
 227 }
 228
 229 <INITIAL>";" {
 230         garble();
 231         passThrough( yytext );
 232 }
 233
 234         /* Any other characters. */
 235 <INITIAL>. {
 236         garble();
 237         passThrough( yytext );
 238 }
 239
 240         /* Numbers. */
 241 <RL_INITIAL,IL_INITIAL>[0-9][0-9]* {
 242         return emitToken( TK_UInt, yytext, yyleng );
 243 }
 244 <RL_INITIAL,IL_INITIAL>0x[0-9a-fA-F][0-9a-fA-F]* {
 245         return emitToken( TK_Hex, yytext, yyleng );
 246 }
 247
 248         /* Keywords in RL and IL. */
 249 <RL_INITIAL>variable\ [a-zA-Z_]+ {
 250         BEGIN(IL_INITIAL);
 251         inlineBlockType = SemiTerminated;
 252         return emitToken( KW_Variable, yytext+9, yyleng-9 );
 253 }
 254 <RL_INITIAL>access {
 255         BEGIN(IL_INITIAL);
 256         inlineBlockType = SemiTerminated;
 257         return emitNoData( KW_Access );
 258 }
 259 <RL_INITIAL>action {
 260         return emitNoData( KW_Action );
 261 }
 262 <RL_INITIAL>alphtype {
 263         BEGIN(IL_INITIAL);
 264         inlineWhitespace = false;
 265         inlineBlockType = SemiTerminated;
 266         return emitNoData( KW_AlphType );
 267 }
 268 <RL_INITIAL>getkey {
 269         BEGIN(IL_INITIAL);
 270         inlineBlockType = SemiTerminated;
 271         return emitNoData( KW_GetKey );
 272 }
 273 <RL_INITIAL>when {
 274         return emitNoData( KW_When );
 275 }
 276 <RL_INITIAL>eof {
 277         return emitNoData( KW_Eof );
 278 }
 279 <RL_INITIAL>err {
 280         return emitNoData( KW_Err );
 281 }
 282 <RL_INITIAL>lerr {
 283         return emitNoData( KW_Lerr );
 284 }
 285 <RL_INITIAL>to {
 286         return emitNoData( KW_To );
 287 }
 288 <RL_INITIAL>from {
 289         return emitNoData( KW_From );
 290 }
 291
 292
 293         /*
 294 <RL_INITIAL>range {
 295         return emitNoData( KW_Range );
 296 }*/
 297
 298 <RL_INITIAL>write {
 299         BEGIN(RL_WRITE);
 300         return emitNoData( KW_Write );
 301 }
 302 <RL_INITIAL>machine {
 303         return emitNoData( KW_Machine );
 304 }
 305 <RL_INITIAL>include {
 306         /* Include tokens statments are processed by both the scanner and the
 307          * parser.  The scanner opens the include file and switches to it and the
 308          * parser invokes a new parser for handling the tokens. We use
 309          * handlingInclude to indicate that the scanner is processing an include
 310          * directive. Ends at ; */
 311         handlingInclude = true;
 312         return emitNoData( KW_Include );
 313 }
 314
 315 <RL_WRITE>{WSCHAR}+ garble();
 316 <RL_WRITE>; {
 317         BEGIN(RL_INITIAL);
 318         return emitNoData( ';' );
 319 }
 320
 321         /* These must be synced in rlparse.y */
 322 <IL_INITIAL>fpc {
 323         return emitNoData( KW_PChar );
 324 }
 325 <IL_INITIAL>fc {
 326         return emitNoData( KW_Char );
 327 }
 328 <IL_INITIAL>fhold {
 329         return emitNoData( KW_Hold );
 330 }
 331 <IL_INITIAL>fgoto {
 332         return emitNoData( KW_Goto );
 333 }
 334 <IL_INITIAL>fcall {
 335         return emitNoData( KW_Call );
 336 }
 337 <IL_INITIAL>fret {
 338         return emitNoData( KW_Ret );
 339 }
 340 <IL_INITIAL>fcurs {
 341         return emitNoData( KW_CurState );
 342 }
 343 <IL_INITIAL>ftargs {
 344         return emitNoData( KW_TargState );
 345 }
 346 <IL_INITIAL>fentry {
 347         return emitNoData( KW_Entry );
 348 }
 349 <IL_INITIAL>fnext {
 350         return emitNoData( KW_Next );
 351 }
 352 <IL_INITIAL>fexec {
 353         return emitNoData( KW_Exec );
 354 }
 355 <IL_INITIAL>fbreak {
 356         return emitNoData( KW_Break );
 357 }
 358
 359         /* Words. */
 360 <RL_INITIAL,IL_INITIAL,RL_WRITE>{IDENT} {
 361         return emitToken( TK_Word, yytext, yyleng );
 362 }
 363
 364         /* Begin a shell style comment. */
 365 <RL_INITIAL>#                   {
 366         BEGIN(RL_SHELL_COM);
 367         extendToken();
 368 }
 369         /* Data in a shell style comment. */
 370 <RL_SHELL_COM>[^\n]+            {
 371         extendToken();
 372 }
 373         /* Terminate a C++ style comment. */
 374 <RL_SHELL_COM>\n                {
 375         BEGIN(RL_INITIAL);
 376         garble();
 377 }
 378
 379         /*
 380          * Start single and double literals.
 381          */
 382 <RL_INITIAL>'                   {
 383         BEGIN(RL_SLIT);
 384         extendToken();
 385 }
 386 <RL_INITIAL>\"                  {
 387         BEGIN(RL_DLIT);
 388         extendToken();
 389 }
 390
 391         /* Escape sequences in single and double literals. */
 392 <RL_SLIT,RL_DLIT>\\0            extendToken( "\0", 1 );
 393 <RL_SLIT,RL_DLIT>\\a            extendToken( "\a", 1 );
 394 <RL_SLIT,RL_DLIT>\\b            extendToken( "\b", 1 );
 395 <RL_SLIT,RL_DLIT>\\t            extendToken( "\t", 1 );
 396 <RL_SLIT,RL_DLIT>\\n            extendToken( "\n", 1 );
 397 <RL_SLIT,RL_DLIT>\\v            extendToken( "\v", 1 );
 398 <RL_SLIT,RL_DLIT>\\f            extendToken( "\f", 1 );
 399 <RL_SLIT,RL_DLIT>\\r            extendToken( "\r", 1 );
 400 <RL_SLIT,RL_DLIT>\\\n           extendToken();
 401 <RL_SLIT,RL_DLIT>\\.            extendToken( yytext+1, 1 );
 402
 403         /* Characters in literals. */
 404 <RL_SLIT>[^']                                           extendToken( yytext, 1 );
 405 <RL_DLIT>[^"]                                           extendToken( yytext, 1 );
 406
 407         /* Terminate a single literal. */
 408 <RL_SLIT>'[i]* {
 409         BEGIN(RL_INITIAL);
 410         return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 );
 411 }
 412         /* Terminate a double literal */
 413 <RL_DLIT>\"[i]* {
 414         BEGIN(RL_INITIAL);
 415         return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 );
 416 }
 417
 418         /*
 419          * Start an OR expression.
 420          */
 421 <RL_INITIAL>"["                 {
 422         BEGIN(RL_OREXP);
 423         return emitNoData( RE_SqOpen );
 424 }
 425
 426 <RL_INITIAL>"\[^"       {
 427         BEGIN(RL_OREXP);
 428         return emitNoData( RE_SqOpenNeg );
 429 }
 430
 431         /* Escape sequences in OR expressions. */
 432 <RL_OREXP>\\0           { return emitToken( RE_Char, "\0", 1 ); }
 433 <RL_OREXP>\\a           { return emitToken( RE_Char, "\a", 1 ); }
 434 <RL_OREXP>\\b           { return emitToken( RE_Char, "\b", 1 ); }
 435 <RL_OREXP>\\t           { return emitToken( RE_Char, "\t", 1 ); }
 436 <RL_OREXP>\\n           { return emitToken( RE_Char, "\n", 1 ); }
 437 <RL_OREXP>\\v           { return emitToken( RE_Char, "\v", 1 ); }
 438 <RL_OREXP>\\f           { return emitToken( RE_Char, "\f", 1 ); }
 439 <RL_OREXP>\\r           { return emitToken( RE_Char, "\r", 1 ); }
 440 <RL_OREXP>\\\n          { garble(); }
 441 <RL_OREXP>\\.           { return emitToken( RE_Char, yytext+1, 1 ); }
 442
 443         /* Range dash in an OR expression. */
 444 <RL_OREXP>-     {
 445         return emitNoData( RE_Dash );
 446 }
 447
 448         /* Characters in an OR expression. */
 449 <RL_OREXP>[^\]] {
 450         return emitToken( RE_Char, yytext, 1 );
 451 }
 452
 453         /* Terminate an OR expression. */
 454 <RL_OREXP>\]    {
 455         BEGIN(RL_INITIAL);
 456         return emitNoData( RE_SqClose );
 457 }
 458
 459         /*
 460          * Start a regular expression.
 461          */
 462 <RL_INITIAL>\/          {
 463         BEGIN(RL_REGEXP);
 464         return emitNoData( RE_Slash );
 465 }
 466
 467         /* Escape sequences in regular expressions. */
 468 <RL_REGEXP,RL_REGEXP_OR>\\0             {
 469         return emitToken( RE_Char, "\0", 1 );
 470 }
 471 <RL_REGEXP,RL_REGEXP_OR>\\a             {
 472         return emitToken( RE_Char, "\a", 1 );
 473 }
 474 <RL_REGEXP,RL_REGEXP_OR>\\b             {
 475         return emitToken( RE_Char, "\b", 1 );
 476 }
 477 <RL_REGEXP,RL_REGEXP_OR>\\t             {
 478         return emitToken( RE_Char, "\t", 1 );
 479 }
 480 <RL_REGEXP,RL_REGEXP_OR>\\n             {
 481         return emitToken( RE_Char, "\n", 1 );
 482 }
 483 <RL_REGEXP,RL_REGEXP_OR>\\v             {
 484         return emitToken( RE_Char, "\v", 1 );
 485 }
 486 <RL_REGEXP,RL_REGEXP_OR>\\f             {
 487         return emitToken( RE_Char, "\f", 1 );
 488 }
 489 <RL_REGEXP,RL_REGEXP_OR>\\r             {
 490         return emitToken( RE_Char, "\r", 1 );
 491 }
 492 <RL_REGEXP,RL_REGEXP_OR>\\\n    {
 493         garble();
 494 }
 495 <RL_REGEXP,RL_REGEXP_OR>\\.             {
 496         return emitToken( RE_Char, yytext+1, 1 );
 497 }
 498
 499         /* Special characters in a regular expression. */
 500 <RL_REGEXP>\.           {
 501         return emitNoData( RE_Dot );
 502 }
 503 <RL_REGEXP>\*           {
 504         return emitNoData( RE_Star );
 505 }
 506 <RL_REGEXP>"\[^"        {
 507         BEGIN(RL_REGEXP_OR);
 508         return emitNoData( RE_SqOpenNeg );
 509 }
 510 <RL_REGEXP>"\["         {
 511         BEGIN(RL_REGEXP_OR);
 512         return emitNoData( RE_SqOpen );
 513 }
 514
 515         /* Range dash in a regular expression or set. */
 516 <RL_REGEXP_OR>- {
 517         return emitNoData( RE_Dash );
 518 }
 519
 520         /* Terminate an or set or a regular expression. */
 521 <RL_REGEXP_OR>\]        {
 522         BEGIN(RL_REGEXP);
 523         return emitNoData( RE_SqClose );
 524 }
 525
 526         /* Characters in a regular expression. */
 527 <RL_REGEXP,RL_REGEXP_OR>[^/]                    {
 528         return emitToken( RE_Char, yytext, 1 );
 529 }
 530
 531         /* Terminate a regular expression */
 532 <RL_REGEXP,RL_REGEXP_OR>\/[i]* {
 533         BEGIN(RL_INITIAL);
 534         return emitToken( RE_Slash, yytext, yyleng );
 535 }
 536
 537         /* Builtin code move to Builtin initial. */
 538 <RL_INITIAL>"{" {
 539         if ( openMachineSpecBlock() ) {
 540                 /* Plain bracket. */
 541                 return emitNoData( *yytext );
 542         }
 543         else {
 544                 /* Start an inline code block. Keep track of where it started in case
 545                  * it terminates prematurely. Return the open bracket. */
 546                 BEGIN(IL_INITIAL);
 547                 inlineBlockType = CurlyDelimited;
 548                 il_code_first_line = id->last_line;
 549                 il_code_first_column = id->last_column+1;
 550                 builtinBrace++;
 551                 return emitNoData( *yytext );
 552         }
 553 }
 554
 555 <RL_INITIAL>\.\. {
 556         return emitNoData( TK_DotDot );
 557 }
 558
 559 <RL_INITIAL>:> {
 560         return emitNoData( TK_ColonGt );
 561 }
 562
 563 <RL_INITIAL>:>> {
 564         return emitNoData( TK_ColonGtGt );
 565 }
 566
 567 <RL_INITIAL><: {
 568         return emitNoData( TK_LtColon );
 569 }
 570
 571 <RL_INITIAL>-- {
 572         return emitNoData( TK_DashDash );
 573 }
 574
 575         /* The instantiation operator. */
 576 <RL_INITIAL>:= {
 577         return emitNoData( TK_ColonEquals );
 578 }
 579
 580         /* Error actions. */
 581 <RL_INITIAL>\>\! {
 582         return emitNoData( TK_StartGblError );
 583 }
 584 <RL_INITIAL>\$\! {
 585         return emitNoData( TK_AllGblError );
 586 }
 587 <RL_INITIAL>%\! {
 588         return emitNoData( TK_FinalGblError );
 589 }
 590 <RL_INITIAL><\! {
 591         return emitNoData( TK_NotStartGblError );
 592 }
 593 <RL_INITIAL>@\! {
 594         return emitNoData( TK_NotFinalGblError );
 595 }
 596 <RL_INITIAL><>\! {
 597         return emitNoData( TK_MiddleGblError );
 598 }
 599
 600         /* Local error actions. */
 601 <RL_INITIAL>\>\^ {
 602         return emitNoData( TK_StartLocalError );
 603 }
 604 <RL_INITIAL>\$\^ {
 605         return emitNoData( TK_AllLocalError );
 606 }
 607 <RL_INITIAL>%\^ {
 608         return emitNoData( TK_FinalLocalError );
 609 }
 610 <RL_INITIAL><\^ {
 611         return emitNoData( TK_NotStartLocalError );
 612 }
 613 <RL_INITIAL>@\^ {
 614         return emitNoData( TK_NotFinalLocalError );
 615 }
 616 <RL_INITIAL><>\^ {
 617         return emitNoData( TK_MiddleLocalError );
 618 }
 619
 620         /* EOF Actions. */
 621 <RL_INITIAL>\>\/ {
 622         return emitNoData( TK_StartEOF );
 623 }
 624 <RL_INITIAL>\$\/ {
 625         return emitNoData( TK_AllEOF );
 626 }
 627 <RL_INITIAL>%\/ {
 628         return emitNoData( TK_FinalEOF );
 629 }
 630 <RL_INITIAL><\/ {
 631         return emitNoData( TK_NotStartEOF );
 632 }
 633 <RL_INITIAL>@\/ {
 634         return emitNoData( TK_NotFinalEOF );
 635 }
 636 <RL_INITIAL><>\/ {
 637         return emitNoData( TK_MiddleEOF );
 638 }
 639
 640         /* To State Actions. */
 641 <RL_INITIAL>\>~ {
 642         return emitNoData( TK_StartToState );
 643 }
 644 <RL_INITIAL>\$~ {
 645         return emitNoData( TK_AllToState );
 646 }
 647 <RL_INITIAL>%~ {
 648         return emitNoData( TK_FinalToState );
 649 }
 650 <RL_INITIAL><~ {
 651         return emitNoData( TK_NotStartToState );
 652 }
 653 <RL_INITIAL>@~ {
 654         return emitNoData( TK_NotFinalToState );
 655 }
 656 <RL_INITIAL><>~ {
 657         return emitNoData( TK_MiddleToState );
 658 }
 659
 660         /* From State Actions. */
 661 <RL_INITIAL>\>\* {
 662         return emitNoData( TK_StartFromState );
 663 }
 664 <RL_INITIAL>\$\* {
 665         return emitNoData( TK_AllFromState );
 666 }
 667 <RL_INITIAL>%\* {
 668         return emitNoData( TK_FinalFromState );
 669 }
 670 <RL_INITIAL><\* {
 671         return emitNoData( TK_NotStartFromState );
 672 }
 673 <RL_INITIAL>@\* {
 674         return emitNoData( TK_NotFinalFromState );
 675 }
 676 <RL_INITIAL><>\* {
 677         return emitNoData( TK_MiddleFromState );
 678 }
 679
 680 <RL_INITIAL><> {
 681         return emitNoData( TK_Middle );
 682 }
 683
 684 <RL_INITIAL>\>\? {
 685         return emitNoData( TK_StartCond );
 686 }
 687 <RL_INITIAL>\$\? {
 688         return emitNoData( TK_AllCond );
 689 }
 690 <RL_INITIAL>%\? {
 691         return emitNoData( TK_LeavingCond );
 692 }
 693
 694         /* The Arrow operator. */
 695 <RL_INITIAL>-> {
 696         return emitNoData( TK_Arrow );
 697 }
 698
 699         /* The double arrow operator. */
 700 <RL_INITIAL>=> {
 701         return emitNoData( TK_DoubleArrow );
 702 }
 703
 704         /* Double star (longest match kleene star). */
 705 <RL_INITIAL>\*\* {
 706         return emitNoData( TK_StarStar );
 707 }
 708
 709         /* Name separator. */
 710 <RL_INITIAL>:: {
 711         return emitNoData( TK_NameSep );
 712 }
 713
 714         /* Opening of longest match. */
 715 <RL_INITIAL>\|\* {
 716         return emitNoData( TK_BarStar );
 717 }
 718
 719         /* Catch the repetition operator now to free up the parser. Once caught,
 720          * Send only the opening brace and rescan the rest so it can be broken
 721          * up for the parser. */
 722 <RL_INITIAL>\{([0-9]+(,[0-9]*)?|,[0-9]+)\} {
 723         yyless(1);
 724         return emitNoData( TK_RepOpOpen );
 725 }
 726
 727         /* Section Deliminator */
 728 <RL_INITIAL>"}%%" {
 729         BEGIN(INITIAL);
 730         return emitNoData( TK_Section );
 731 }
 732
 733         /* Whitespace. */
 734 <RL_INITIAL>[\t\v\f\r ]         garble();
 735 <RL_INITIAL>\n {
 736         if ( multiline )
 737                 garble();
 738         else {
 739                 BEGIN(INITIAL);
 740                 return emitNoData( TK_SectionNL );
 741         }
 742 }
 743
 744         /* Any other characters. */
 745 <RL_INITIAL>. {
 746         return emitNoData( *yytext );
 747 }
 748
 749         /* End of input in a literal is an error. */
 750 <RL_SLIT,RL_DLIT><<EOF>> {
 751         error(id->first_line, id->first_column) << "unterminated literal" << endl;
 752         exit(1);
 753 }
 754
 755         /* End of input in a comment is an error. */
 756 <RL_SHELL_COM><<EOF>> {
 757         error(id->first_line, id->first_column) << "unterminated comment" << endl;
 758         exit(1);
 759 }
 760
 761         /* Begin a C style comment. */
 762 <IL_INITIAL>"/*" {
 763         BEGIN(IL_C_COM);
 764         il_comm_lit_first_line = id->last_line;
 765         il_comm_lit_first_column = id->last_column+1;
 766         extendToken( yytext, yyleng );
 767 }
 768         /* Data in a C style comment. */
 769 <IL_C_COM>\n    extendToken( yytext, 1 );
 770 <IL_C_COM>.     extendToken( yytext, 1 );
 771
 772         /* Terminate a C style comment. */
 773 <IL_C_COM>"*/" {
 774         BEGIN(IL_INITIAL);
 775         return emitToken( IL_Comment, yytext, 2 );
 776 }
 777
 778         /* Begin a C++ style comment. */
 779 <IL_INITIAL>"//" {
 780         BEGIN(IL_CXX_COM);
 781         il_comm_lit_first_line = id->last_line;
 782         il_comm_lit_first_column = id->last_column+1;
 783         extendToken( yytext, yyleng );
 784 }
 785         /* Data in a C++ style comment. */
 786 <IL_CXX_COM>[^\n]+ {
 787         extendToken( yytext, yyleng );
 788 }
 789         /* Terminate a C++ style comment. */
 790 <IL_CXX_COM>\n {
 791         BEGIN(IL_INITIAL);
 792         return emitToken( IL_Comment, yytext, 1 );
 793 }
 794
 795
 796         /* Start literals. */
 797 <IL_INITIAL>' {
 798         BEGIN(IL_SGL_LIT);
 799         il_comm_lit_first_line = id->last_line;
 800         il_comm_lit_first_column = id->last_column+1;
 801         extendToken( yytext, 1 );
 802 }
 803 <IL_INITIAL>\" {
 804         BEGIN(IL_DBL_LIT);
 805         il_comm_lit_first_line = id->last_line;
 806         il_comm_lit_first_column = id->last_column+1;
 807         extendToken( yytext, 1 );
 808 }
 809         /* Various escape sequences in literals. We don't need to get them
 810          * all here. We just need to pick off the ones that could confuse us
 811          * about the literal we are matching */
 812 <IL_SGL_LIT,IL_DBL_LIT>\\'              extendToken( yytext, yyleng );
 813 <IL_SGL_LIT,IL_DBL_LIT>\\\"             extendToken( yytext, yyleng );
 814 <IL_SGL_LIT,IL_DBL_LIT>\\\\             extendToken( yytext, yyleng );
 815         /* Characters in literals. */
 816 <IL_DBL_LIT>[^\"]                               extendToken( yytext, 1 );
 817 <IL_SGL_LIT>[^']                                extendToken( yytext, 1 );
 818
 819         /* Terminate a double literal */
 820 <IL_DBL_LIT>\" {
 821         BEGIN(IL_INITIAL);
 822         return emitToken( IL_Literal, yytext, 1 );
 823 }
 824         /* Terminate a single literal. */
 825 <IL_SGL_LIT>' {
 826         BEGIN(IL_INITIAL);
 827         return emitToken( IL_Literal, yytext, 1 );
 828 }
 829
 830         /* Open Brace, increment count of open braces. */
 831 <IL_INITIAL>"{" {
 832         builtinBrace++;
 833         return emitToken( IL_Symbol, yytext, 1 );
 834 }
 835
 836         /* Close brace, decrement count of open braces. */
 837 <IL_INITIAL>"}" {
 838         builtinBrace--;
 839         if ( inlineBlockType == CurlyDelimited && builtinBrace == 0 ) {
 840                 /* Inline code block ends. */
 841                 BEGIN(RL_INITIAL);
 842                 inlineWhitespace = true;
 843                 return emitNoData( *yytext );
 844         }
 845         else {
 846                 /* Either a semi terminated inline block or only the closing brace of
 847                  * some inner scope, not the block's closing brace. */
 848                 return emitToken( IL_Symbol, yytext, 1 );
 849         }
 850 }
 851
 852         /* May need to terminate the inline block. */
 853 <IL_INITIAL>; {
 854         if ( inlineBlockType == SemiTerminated ) {
 855                 /* Inline code block ends. */
 856                 BEGIN(RL_INITIAL);
 857                 inlineWhitespace = true;
 858                 return emitNoData( TK_Semi );
 859         }
 860         else {
 861                 /* Not ending. The semi is sent as a token, not a generic symbol. */
 862                 return emitNoData( *yytext );
 863         }
 864 }
 865
 866         /* Catch some symbols so they can be
 867          * sent as tokens instead as generic symbols. */
 868 <IL_INITIAL>[*()] {
 869         return emitNoData( *yytext );
 870 }
 871 <IL_INITIAL>:: {
 872         return emitNoData( TK_NameSep );
 873 }
 874
 875         /* Whitespace. */
 876 <IL_INITIAL>{WSCHAR}+ {
 877         if ( inlineWhitespace )
 878                 return emitToken( IL_WhiteSpace, yytext, yyleng );
 879 }
 880
 881         /* Any other characters. */
 882 <IL_INITIAL>. {
 883         return emitToken( IL_Symbol, yytext, 1 );
 884 }
 885
 886 <INITIAL><<EOF>> {
 887         /* If we are not at the bottom of the include stack, then pop the current
 888          * file that we are scanning. Since we are always returning 0 to the parser
 889          * it will exit and return to the parser that called it. */
 890         if ( inc_stack_ptr > 0 )
 891                 popInclude();
 892         return 0;
 893 }
 894
 895         /* End of input in a literal is an error. */
 896 <IL_SGL_LIT,IL_DBL_LIT><<EOF>>          {
 897         error(il_comm_lit_first_line, il_comm_lit_first_column) <<
 898                         "unterminated literal" << endl;
 899         exit(1);
 900 }
 901
 902         /* End of input in a comment is an error. */
 903 <IL_C_COM,IL_CXX_COM><<EOF>>    {
 904         error(il_comm_lit_first_line, il_comm_lit_first_column) <<
 905                         "unterminated comment" << endl;
 906         exit(1);
 907 }
 908
 909         /* End of intput in a code block. */
 910 <IL_INITIAL><<EOF>> {
 911         error(il_code_first_line, il_code_first_column) <<
 912                         "unterminated code block" << endl;
 913         exit(1);
 914 }
 915
 916 %%
 917
 918 /* Write out token data, escaping special charachters. */
 919 #ifdef WANT_TOKEN_WRITE
 920 void writeToken( int token, char *data )
 921 {
 922         cout << "token id " << token << " at " << id->fileName << ":" <<
 923                         yylloc->first_line << ":" << yylloc->first_column << "-" <<
 924                         yylloc->last_line << ":" << yylloc->last_column << " ";
 925
 926         if ( data != 0 ) {
 927                 while ( *data != 0 ) {
 928                         switch ( *data ) {
 929                         case '\n':      cout << "\\n"; break;
 930                         case '\t':      cout << "\\t"; break;
 931                         default:        cout << *data; break;
 932                         }
 933                         data += 1;
 934                 }
 935         }
 936         cout << endl;
 937 }
 938 #endif
 939
 940 /* Caclulate line info from yytext. Called on every pattern match. */
 941 void updateLineInfo()
 942 {
 943         /* yytext should always have at least one char. */
 944         assert( yytext[0] != 0 );
 945
 946         /* Scan through yytext up to the last character. */
 947         char *p = yytext;
 948         for ( ; p[1] != 0; p++ ) {
 949                 if ( p[0] == '\n' ) {
 950                         id->last_line += 1;
 951                         id->last_column = 0;
 952                 }
 953                 else {
 954                         id->last_column += 1;
 955                 }
 956         }
 957
 958         /* Always consider the last character as not a newline. Newlines at the
 959          * end of a token are as any old character at the end of the line. */
 960         id->last_column += 1;
 961
 962         /* The caller may be about to emit a token, be prepared to pass the line
 963          * info to the parser. */
 964         yylloc->first_line = id->first_line;
 965         yylloc->first_column = id->first_column;
 966         yylloc->last_line = id->last_line;
 967         yylloc->last_column = id->last_column;
 968
 969         /* If the last character was indeed a newline, then wrap ahead now. */
 970         if ( p[0] == '\n' ) {
 971                 id->last_line += 1;
 972                 id->last_column = 0;
 973         }
 974 }
 975
 976 /* Eat up a matched pattern that will not be part of a token. */
 977 void garble()
 978 {
 979         /* Update line information from yytext. */
 980         updateLineInfo();
 981
 982         /* The next token starts ahead of the last token. */
 983         id->first_line = id->last_line;
 984         id->first_column = id->last_column + 1;
 985 }
 986
 987 /* Append data to the end of the token. More token data expected. */
 988 void extendToken( char *data, int len )
 989 {
 990         if ( data != 0 && len > 0 )
 991                 tokbuf.append( data, len );
 992
 993         /* Update line information from yytext. */
 994         updateLineInfo();
 995 }
 996
 997 /* Extend, but with no data, more data to come. */
 998 void extendToken()
 999 {
1000         /* Update line information from yytext. */
1001         updateLineInfo();
1002 }
1003
1004
1005 /* Possibly process include data. */
1006 void processInclude( int token )
1007 {
1008         static char *incFileName = 0;
1009
1010         if ( handlingInclude ) {
1011                 if ( token == KW_Include )
1012                         incFileName = 0;
1013                 else if ( token == TK_Literal )
1014                         incFileName = yylval->data.data;
1015                 else if ( token == ';' ) {
1016                         /* Terminate the include statement. Start reading from included file. */
1017                         handlingInclude = false;
1018
1019                         if ( id->active && includeDepth < INCLUDE_STACK_SIZE ) {
1020                                 /* If there is no section name or input file, default to the curren values. */
1021                                 if ( incFileName == 0 )
1022                                         incFileName = id->fileName;
1023
1024                                 /* Make the new buffer and switch to it. */
1025                                 FILE *incFile = fopen( incFileName, "rt" );
1026                                 if ( incFile != 0 ) {
1027                                         buff_stack[inc_stack_ptr] = YY_CURRENT_BUFFER;
1028                                         multiline_stack[inc_stack_ptr] = multiline;
1029                                         inc_stack_ptr += 1;
1030                                         yy_switch_to_buffer( yy_create_buffer( incFile, YY_BUF_SIZE ) );
1031                                         BEGIN(INITIAL);
1032                                 }
1033                                 else {
1034                                         error(*yylloc) << "could not locate include file \"" << incFileName
1035                                                         << "\"" << endl;
1036                                 }
1037                         }
1038                 }
1039         }
1040 }
1041
1042 void popInclude()
1043 {
1044         /* Free the current buffer and move to the previous. */
1045         yy_delete_buffer( YY_CURRENT_BUFFER );
1046         inc_stack_ptr -= 1;
1047         yy_switch_to_buffer( buff_stack[inc_stack_ptr] );
1048         multiline = multiline_stack[inc_stack_ptr];
1049
1050         /* Includes get called only from RL_INITIAL. */
1051         BEGIN(RL_INITIAL);
1052 }
1053
1054
1055 /* Append data to the end of a token and emitToken it to the parser. */
1056 int emitToken( int token, char *data, int len )
1057 {
1058         /* Append any new data. */
1059         if ( data != 0 && len > 0 )
1060                 tokbuf.append( data, len );
1061
1062         /* Duplicate the buffer. */
1063         yylval->data.length = tokbuf.length;
1064         yylval->data.data = new char[tokbuf.length+1];
1065         memcpy( yylval->data.data, tokbuf.data, tokbuf.length );
1066         yylval->data.data[tokbuf.length] = 0;
1067
1068         /* Update line information from yytext. */
1069         updateLineInfo();
1070
1071         /* Write token info. */
1072 #ifdef WANT_TOKEN_WRITE
1073         writeToken( token, tokbuf.data );
1074 #endif
1075
1076         /* Clear out the buffer. */
1077         tokbuf.clear();
1078
1079         /* The next token starts ahead of the last token. */
1080         id->first_line = id->last_line;
1081         id->first_column = id->last_column + 1;
1082
1083         /* Maintain a record of two tokens back. */
1084         previous_tokens[1] = previous_tokens[0];
1085         previous_tokens[0] = token;
1086
1087         /* Possibly process the include statement; */
1088         processInclude( token );
1089
1090         return token;
1091 }
1092
1093 /* Emit a token with no data to the parser. */
1094 int emitNoData( int token )
1095 {
1096         /* Return null to the parser. */
1097         yylval->data.data = 0;
1098         yylval->data.length = 0;
1099
1100         /* Update line information from yytext. */
1101         updateLineInfo();
1102
1103         /* Write token info. */
1104 #ifdef WANT_TOKEN_WRITE
1105         writeToken( token, 0 );
1106 #endif
1107
1108         /* Clear out the buffer. */
1109         tokbuf.clear();
1110
1111         /* The next token starts ahead of the last token. */
1112         id->first_line = id->last_line;
1113         id->first_column = id->last_column + 1;
1114
1115         /* Maintain a record of two tokens back. */
1116         previous_tokens[1] = previous_tokens[0];
1117         previous_tokens[0] = token;
1118
1119         /* Possibly process the include statement; */
1120         processInclude( token );
1121
1122         return token;
1123 }
1124
1125 /* Pass tokens in outter code through to the output. */
1126 void passThrough( char *data )
1127 {
1128         /* If no errors and we are at the bottom of the include stack (the source
1129          * file listed on the command line) then write out the data. */
1130         if ( gblErrorCount == 0 && inc_stack_ptr == 0 &&
1131                         machineSpec == 0 && machineName == 0 )
1132         {
1133                 xmlEscapeHost( *outStream, data );
1134         }
1135 }
1136
1137 /* Init a buffer. */
1138 Buffer::Buffer()
1139 :
1140         data(0),
1141         length(0),
1142         allocated(0)
1143 {
1144 }
1145
1146 /* Empty out a buffer on destruction. */
1147 Buffer::~Buffer()
1148 {
1149         empty();
1150 }
1151
1152 /* Free the space allocated for the buffer. */
1153 void Buffer::empty()
1154 {
1155         if ( data != 0 ) {
1156                 free( data );
1157
1158                 data = 0;
1159                 length = 0;
1160                 allocated = 0;
1161         }
1162 }
1163
1164 /* Grow the buffer when to len allocation. */
1165 void Buffer::upAllocate( int len )
1166 {
1167         if ( data == 0 )
1168                 data = (char*) malloc( len );
1169         else
1170                 data = (char*) realloc( data, len );
1171         allocated = len;
1172 }
1173
1174 int yywrap()
1175 {
1176         /* Once processessing of the input is done, signal no more. */
1177         return 1;
1178 }
1179
1180 /* Here simply to suppress the unused yyunpt warning. */
1181 void thisFuncIsNeverCalled()
1182 {
1183         yyunput(0, 0);
1184 }
1185
1186 /* Put the scannner back into the outside code start state. */
1187 void beginOutsideCode()
1188 {
1189         BEGIN(INITIAL);
1190 }
1191
1192 /* Determine if we are opening a machine specification block. */
1193 bool openMachineSpecBlock()
1194 {
1195         if ( previous_tokens[1] == TK_Section && previous_tokens[0] == TK_Word )
1196                 return true;
1197         else if ( previous_tokens[0] == TK_Section )
1198                 return true;
1199         return false;
1200 }
1201
1202 /* Wrapper for the lexer which stores the locations of the value and location
1203  * variables of the parser into globals. The parser is reentrant, however the scanner
1204  * does not need to be, so globals work fine. This saves us passing them around
1205  * all the helper functions. */
1206 int yylex( YYSTYPE *yylval, YYLTYPE *yylloc )
1207 {
1208         ::yylval = yylval;
1209         ::yylloc = yylloc;
1210         return ragel_lex();
1211 }
1212