libs/wave/samples/token_statistics/xlex/xlex_lexer.hpp

   1 /*=============================================================================
   2     Boost.Wave: A Standard compliant C++ preprocessor library
   3
   4     Xpressive based C++ lexer
   5
   6     http://www.boost.org/
   7
   8     Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
   9     Software License, Version 1.0. (See accompanying file
  10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  11 =============================================================================*/
  12
  13 #if !defined(XLEX_LEXER_HPP)
  14 #define XLEX_LEXER_HPP
  15
  16 #include <string>
  17 #include <cstdio>
  18 #include <cstdarg>
  19 #if defined(BOOST_SPIRIT_DEBUG)
  20 #include <iostream>
  21 #endif // defined(BOOST_SPIRIT_DEBUG)
  22
  23 #include <boost/concept_check.hpp>
  24 #include <boost/assert.hpp>
  25 #include <boost/spirit/include/classic_core.hpp>
  26
  27 #include <boost/wave/token_ids.hpp>
  28 #include <boost/wave/language_support.hpp>
  29 #include <boost/wave/util/file_position.hpp>
  30 #include <boost/wave/cpplexer/validate_universal_char.hpp>
  31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  32 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  33 #include <boost/wave/cpplexer/detect_include_guards.hpp>
  34 #endif
  35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
  36
  37 // reuse the default token type
  38 #include "../xlex_iterator.hpp"
  39
  40 // include the xpressive headers
  41 #include "xpressive_lexer.hpp"
  42
  43 ///////////////////////////////////////////////////////////////////////////////
  44 namespace boost {
  45 namespace wave {
  46 namespace cpplexer {
  47 namespace xlex {
  48 namespace lexer {
  49
  50 ///////////////////////////////////////////////////////////////////////////////
  51 //
  52 //  encapsulation of the xpressive based C++ lexer
  53 //
  54 ///////////////////////////////////////////////////////////////////////////////
  55
  56 template <
  57     typename Iterator,
  58     typename Position = boost::wave::util::file_position_type
  59 >
  60 class lexer
  61 {
  62 public:
  63     typedef char                                        char_type;
  64     typedef boost::wave::cpplexer::lex_token<Position>  token_type;
  65     typedef typename token_type::string_type            string_type;
  66
  67     lexer(Iterator const &first, Iterator const &last,
  68         Position const &pos, boost::wave::language_support language);
  69     ~lexer() {}
  70
  71     token_type& get(token_type& t);
  72     void set_position(Position const &pos)
  73     {
  74         // set position has to change the file name and line number only
  75         filename = pos.get_file();
  76         line = pos.get_line();
  77     }
  78
  79 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  80     bool has_include_guards(std::string& guard_name) const
  81         { return guards.detected(guard_name); }
  82 #endif
  83
  84 private:
  85     typedef xpressive_lexer<Iterator, token_id> lexer_type;
  86     typedef typename lexer_type::callback_type callback_type;
  87
  88     lexer_type xlexer;
  89     Iterator first;
  90     Iterator last;
  91
  92     string_type filename;
  93     int line;
  94     bool at_eof;
  95     boost::wave::language_support language;
  96
  97 // initialization data (regular expressions for the token definitions)
  98     struct lexer_data {
  99         token_id tokenid;                 // token data
 100         char_type const *tokenregex;      // associated token to match
 101         callback_type tokencb;            // associated callback function
 102     };
 103
 104     static lexer_data const init_data[];        // common patterns
 105     static lexer_data const init_data_cpp[];    // C++ only patterns
 106
 107 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 108     boost::wave::cpplexer::include_guards<token_type> guards;
 109 #endif
 110 };
 111
 112 ///////////////////////////////////////////////////////////////////////////////
 113 //  helper for initializing token data
 114 #define TOKEN_DATA(id, regex) \
 115     { id, regex, 0 }
 116
 117 #define TOKEN_DATA_EX(id, regex, callback) \
 118     { id, regex, callback }
 119
 120 ///////////////////////////////////////////////////////////////////////////////
 121 //  data required for initialization of the lexer (token definitions)
 122 #define OR                  "|"
 123 #define Q(c)                "\\" c
 124 #define TRI(c)              Q("?") Q("?") c
 125
 126 // definition of some subtoken regexps to simplify the regex definitions
 127 #define BLANK               "[ \t]"
 128 #define CCOMMENT            Q("/") Q("*") ".*?" Q("*") Q("/")
 129
 130 #define PPSPACE             "(" BLANK OR CCOMMENT ")*"
 131
 132 #define OCTALDIGIT          "[0-7]"
 133 #define DIGIT               "[0-9]"
 134 #define HEXDIGIT            "[0-9a-fA-F]"
 135 #define SIGN                "[-+]?"
 136 #define EXPONENT            "(" "[eE]" SIGN "[0-9]+" ")"
 137
 138 #define INTEGER             "(" \
 139                                 "(0x|0X)" HEXDIGIT "+" OR \
 140                                 "0" OCTALDIGIT "*" OR \
 141                                 "[1-9]" DIGIT "*" \
 142                             ")"
 143
 144 #define INTEGER_SUFFIX      "(" "[uU][lL]?|[lL][uU]?" ")"
 145 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 146 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "[lL][lL]" ")" OR \
 147                                 "(" "[lL][lL]" ")" "[uU]" "?" OR \
 148                                 "i64" \
 149                             ")"
 150 #else
 151 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "[lL][lL]" ")" OR \
 152                             "(" "[lL][lL]" ")" "[uU]" "?" ")"
 153 #endif
 154 #define FLOAT_SUFFIX        "(" "[fF][lL]?|[lL][fF]?" ")"
 155 #define CHAR_SPEC           "L?"
 156
 157 #define BACKSLASH           "(" Q("\\") OR TRI(Q("/")) ")"
 158 #define ESCAPESEQ           BACKSLASH "(" \
 159                                 "[abfnrtv?'\"]" OR \
 160                                 BACKSLASH OR \
 161                                 "x" HEXDIGIT "+" OR \
 162                                 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
 163                             ")"
 164 #define HEXQUAD             HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
 165 #define UNIVERSALCHAR       BACKSLASH "(" \
 166                                 "u" HEXQUAD OR \
 167                                 "U" HEXQUAD HEXQUAD \
 168                             ")"
 169
 170 #define POUNDDEF            "(" "#" OR TRI("=") OR Q("%:") ")"
 171 #define NEWLINEDEF          "(" "\n" OR "\r\n" OR "\r" ")"
 172
 173 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 174 #define INCLUDEDEF          "(include_next|include)"
 175 #else
 176 #define INCLUDEDEF          "include"
 177 #endif
 178
 179 ///////////////////////////////////////////////////////////////////////////////
 180 // common C++/C99 token definitions
 181 template <typename Iterator, typename Position>
 182 typename lexer<Iterator, Position>::lexer_data const
 183 lexer<Iterator, Position>::init_data[] =
 184 {
 185     TOKEN_DATA(T_CCOMMENT, CCOMMENT),
 186     TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
 187     TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
 188                 "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
 189     TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
 190                 "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
 191     TOKEN_DATA(T_ANDAND, "&&"),
 192     TOKEN_DATA(T_ANDASSIGN, "&="),
 193     TOKEN_DATA(T_AND, "&"),
 194     TOKEN_DATA(T_EQUAL, "=="),
 195     TOKEN_DATA(T_ASSIGN, "="),
 196     TOKEN_DATA(T_ORASSIGN, Q("|=")),
 197     TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
 198     TOKEN_DATA(T_OROR, Q("|") Q("|")),
 199     TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
 200     TOKEN_DATA(T_OR, Q("|")),
 201     TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
 202     TOKEN_DATA(T_XORASSIGN, Q("^=")),
 203     TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
 204     TOKEN_DATA(T_XOR, Q("^")),
 205     TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
 206     TOKEN_DATA(T_COMMA, ","),
 207     TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
 208     TOKEN_DATA(T_COLON, ":"),
 209     TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
 210     TOKEN_DATA(T_DIVIDE, Q("/")),
 211     TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
 212     TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
 213     TOKEN_DATA(T_SHIFTRIGHT, ">>"),
 214     TOKEN_DATA(T_GREATEREQUAL, ">="),
 215     TOKEN_DATA(T_GREATER, ">"),
 216     TOKEN_DATA(T_LEFTBRACE, Q("{")),
 217     TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
 218     TOKEN_DATA(T_SHIFTLEFT, "<<"),
 219     TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
 220     TOKEN_DATA(T_LESSEQUAL, "<="),
 221     TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
 222     TOKEN_DATA(T_LESS, "<"),
 223     TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
 224     TOKEN_DATA(T_LEFTPAREN, Q("(")),
 225     TOKEN_DATA(T_LEFTBRACKET, Q("[")),
 226     TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
 227     TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
 228     TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
 229     TOKEN_DATA(T_ARROW, Q("->")),
 230     TOKEN_DATA(T_MINUS, Q("-")),
 231     TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
 232     TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
 233     TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
 234     TOKEN_DATA(T_POUND_ALT, Q("%:")),
 235     TOKEN_DATA(T_PERCENT, Q("%")),
 236     TOKEN_DATA(T_NOTEQUAL, "!="),
 237     TOKEN_DATA(T_NOT, "!"),
 238     TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
 239     TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
 240     TOKEN_DATA(T_PLUS, Q("+")),
 241     TOKEN_DATA(T_RIGHTBRACE, Q("}")),
 242     TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
 243     TOKEN_DATA(T_RIGHTPAREN, Q(")")),
 244     TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
 245     TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
 246     TOKEN_DATA(T_SEMICOLON, ";"),
 247     TOKEN_DATA(T_STARASSIGN, Q("*=")),
 248     TOKEN_DATA(T_STAR, Q("*")),
 249     TOKEN_DATA(T_COMPL, Q("~")),
 250     TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
 251     TOKEN_DATA(T_ASM, "asm"),
 252     TOKEN_DATA(T_AUTO, "auto"),
 253     TOKEN_DATA(T_BOOL, "bool"),
 254     TOKEN_DATA(T_FALSE, "false"),
 255     TOKEN_DATA(T_TRUE, "true"),
 256     TOKEN_DATA(T_BREAK, "break"),
 257     TOKEN_DATA(T_CASE, "case"),
 258     TOKEN_DATA(T_CATCH, "catch"),
 259     TOKEN_DATA(T_CHAR, "char"),
 260     TOKEN_DATA(T_CLASS, "class"),
 261     TOKEN_DATA(T_CONSTCAST, "const_cast"),
 262     TOKEN_DATA(T_CONST, "const"),
 263     TOKEN_DATA(T_CONTINUE, "continue"),
 264     TOKEN_DATA(T_DEFAULT, "default"),
 265     TOKEN_DATA(T_DELETE, "delete"),
 266     TOKEN_DATA(T_DOUBLE, "double"),
 267     TOKEN_DATA(T_DO, "do"),
 268     TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
 269     TOKEN_DATA(T_ELSE, "else"),
 270     TOKEN_DATA(T_ENUM, "enum"),
 271     TOKEN_DATA(T_EXPLICIT, "explicit"),
 272     TOKEN_DATA(T_EXPORT, "export"),
 273     TOKEN_DATA(T_EXTERN, "extern"),
 274     TOKEN_DATA(T_FLOAT, "float"),
 275     TOKEN_DATA(T_FOR, "for"),
 276     TOKEN_DATA(T_FRIEND, "friend"),
 277     TOKEN_DATA(T_GOTO, "goto"),
 278     TOKEN_DATA(T_IF, "if"),
 279     TOKEN_DATA(T_INLINE, "inline"),
 280     TOKEN_DATA(T_INT, "int"),
 281     TOKEN_DATA(T_LONG, "long"),
 282     TOKEN_DATA(T_MUTABLE, "mutable"),
 283     TOKEN_DATA(T_NAMESPACE, "namespace"),
 284     TOKEN_DATA(T_NEW, "new"),
 285     TOKEN_DATA(T_OPERATOR, "operator"),
 286     TOKEN_DATA(T_PRIVATE, "private"),
 287     TOKEN_DATA(T_PROTECTED, "protected"),
 288     TOKEN_DATA(T_PUBLIC, "public"),
 289     TOKEN_DATA(T_REGISTER, "register"),
 290     TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
 291     TOKEN_DATA(T_RETURN, "return"),
 292     TOKEN_DATA(T_SHORT, "short"),
 293     TOKEN_DATA(T_SIGNED, "signed"),
 294     TOKEN_DATA(T_SIZEOF, "sizeof"),
 295     TOKEN_DATA(T_STATICCAST, "static_cast"),
 296     TOKEN_DATA(T_STATIC, "static"),
 297     TOKEN_DATA(T_STRUCT, "struct"),
 298     TOKEN_DATA(T_SWITCH, "switch"),
 299     TOKEN_DATA(T_TEMPLATE, "template"),
 300     TOKEN_DATA(T_THIS, "this"),
 301     TOKEN_DATA(T_THROW, "throw"),
 302     TOKEN_DATA(T_TRY, "try"),
 303     TOKEN_DATA(T_TYPEDEF, "typedef"),
 304     TOKEN_DATA(T_TYPEID, "typeid"),
 305     TOKEN_DATA(T_TYPENAME, "typename"),
 306     TOKEN_DATA(T_UNION, "union"),
 307     TOKEN_DATA(T_UNSIGNED, "unsigned"),
 308     TOKEN_DATA(T_USING, "using"),
 309     TOKEN_DATA(T_VIRTUAL, "virtual"),
 310     TOKEN_DATA(T_VOID, "void"),
 311     TOKEN_DATA(T_VOLATILE, "volatile"),
 312     TOKEN_DATA(T_WCHART, "wchar_t"),
 313     TOKEN_DATA(T_WHILE, "while"),
 314 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 315     TOKEN_DATA(T_MSEXT_INT8, "__int8"),
 316     TOKEN_DATA(T_MSEXT_INT16, "__int16"),
 317     TOKEN_DATA(T_MSEXT_INT32, "__int32"),
 318     TOKEN_DATA(T_MSEXT_INT64, "__int64"),
 319     TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
 320     TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
 321     TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
 322     TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
 323     TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
 324     TOKEN_DATA(T_MSEXT_TRY , "__try"),
 325     TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
 326     TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
 327     TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
 328     TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
 329     TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
 330     TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
 331     TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
 332 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 333     TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
 334     TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
 335     TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
 336     TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
 337     TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
 338     TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
 339     TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
 340     TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
 341     TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
 342         INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
 343     TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
 344         INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
 345     TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
 346         INCLUDEDEF PPSPACE),
 347     TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
 348     TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
 349     TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
 350     TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
 351     TOKEN_DATA(T_FLOATLIT,
 352         "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
 353         EXPONENT "?" FLOAT_SUFFIX "?" OR
 354         DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
 355     TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
 356     TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
 357 #if BOOST_WAVE_USE_STRICT_LEXER != 0
 358     TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
 359 #else
 360     TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
 361 #endif
 362     TOKEN_DATA(T_SPACE, BLANK "+"),
 363     TOKEN_DATA(T_SPACE2, "[\v\f]+"),
 364     TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
 365     TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
 366     TOKEN_DATA(T_POUND_POUND, "##"),
 367     TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
 368     TOKEN_DATA(T_POUND, "#"),
 369     TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
 370     TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
 371     TOKEN_DATA(T_QUESTION_MARK, Q("?")),
 372     TOKEN_DATA(T_DOT, Q(".")),
 373     TOKEN_DATA(T_ANY, "."),
 374     { token_id(0) }       // this should be the last entry
 375 };
 376
 377 ///////////////////////////////////////////////////////////////////////////////
 378 // C++ only token definitions
 379 template <typename Iterator, typename Position>
 380 typename lexer<Iterator, Position>::lexer_data const
 381 lexer<Iterator, Position>::init_data_cpp[] =
 382 {
 383     TOKEN_DATA(T_AND_ALT, "bitand"),
 384     TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
 385     TOKEN_DATA(T_ANDAND_ALT, "and"),
 386     TOKEN_DATA(T_OR_ALT, "bitor"),
 387     TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
 388     TOKEN_DATA(T_OROR_ALT, "or"),
 389     TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
 390     TOKEN_DATA(T_XOR_ALT, "xor"),
 391     TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
 392     TOKEN_DATA(T_NOT_ALT, "not"),
 393     TOKEN_DATA(T_COMPL_ALT, "compl"),
 394     TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
 395     TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
 396     TOKEN_DATA(T_COLON_COLON, "::"),
 397     { token_id(0) }       // this should be the last entry
 398 };
 399
 400 ///////////////////////////////////////////////////////////////////////////////
 401 //  undefine macros, required for regular expression definitions
 402 #undef INCLUDEDEF
 403 #undef POUNDDEF
 404 #undef CCOMMENT
 405 #undef PPSPACE
 406 #undef DIGIT
 407 #undef OCTALDIGIT
 408 #undef HEXDIGIT
 409 #undef SIGN
 410 #undef EXPONENT
 411 #undef LONGINTEGER_SUFFIX
 412 #undef INTEGER_SUFFIX
 413 #undef INTEGER
 414 #undef FLOAT_SUFFIX
 415 #undef CHAR_SPEC
 416 #undef BACKSLASH
 417 #undef ESCAPESEQ
 418 #undef HEXQUAD
 419 #undef UNIVERSALCHAR
 420
 421 #undef Q
 422 #undef TRI
 423 #undef OR
 424
 425 #undef TOKEN_DATA
 426 #undef TOKEN_DATA_EX
 427
 428 ///////////////////////////////////////////////////////////////////////////////
 429 // initialize cpp lexer
 430 template <typename Iterator, typename Position>
 431 inline
 432 lexer<Iterator, Position>::lexer(Iterator const &first,
 433         Iterator const &last, Position const &pos,
 434         boost::wave::language_support language)
 435 :   first(first), last(last),
 436     filename(pos.get_file()), line(0), at_eof(false), language(language)
 437 {
 438 // if in C99 mode, some of the keywords/operators are not valid
 439     if (!boost::wave::need_c99(language)) {
 440         for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
 441             xlexer.register_regex(init_data_cpp[j].tokenregex,
 442                 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
 443         }
 444     }
 445
 446 // tokens valid for C++ and C99
 447     for (int i = 0; 0 != init_data[i].tokenid; ++i) {
 448         xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
 449             init_data[i].tokencb);
 450     }
 451 }
 452
 453 ///////////////////////////////////////////////////////////////////////////////
 454 //  get the next token from the input stream
 455 template <typename Iterator, typename Position>
 456 inline boost::wave::cpplexer::lex_token<Position>&
 457 lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
 458 {
 459     using namespace boost::wave;    // to import token ids to this scope
 460
 461     if (at_eof)
 462         return t = cpplexer::lex_token<Position>();  // return T_EOI
 463
 464     std::string tokval;
 465     token_id id = xlexer.next_token(first, last, tokval);
 466     string_type value = tokval.c_str();
 467
 468     if ((token_id)(-1) == id)
 469         id = T_EOF;     // end of input reached
 470
 471     if (T_IDENTIFIER == id) {
 472     // test identifier characters for validity (throws if invalid chars found)
 473         if (!boost::wave::need_no_character_validation(language)) {
 474             cpplexer::impl::validate_identifier_name(value, line, -1, filename);
 475         }
 476     }
 477     else if (T_STRINGLIT == id || T_CHARLIT == id) {
 478     // test literal characters for validity (throws if invalid chars found)
 479         if (!boost::wave::need_no_character_validation(language)) {
 480             cpplexer::impl::validate_literal(value, line, -1, filename);
 481         }
 482     }
 483     else if (T_EOF == id) {
 484     // T_EOF is returned as a valid token, the next call will return T_EOI,
 485     // i.e. the actual end of input
 486         at_eof = true;
 487         value.clear();
 488     }
 489
 490 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 491     cpplexer::lex_token<Position> tok(id, value, Position(filename, line, -1));
 492     return t = guards.detect_guard(tok);
 493 #else
 494     return t = cpplexer::lex_token<Position>(id, value,
 495         Position(filename, line, -1));
 496 #endif
 497 }
 498
 499 ///////////////////////////////////////////////////////////////////////////////
 500 //
 501 //  lex_functor
 502 //
 503 ///////////////////////////////////////////////////////////////////////////////
 504 template <
 505     typename Iterator,
 506     typename Position = boost::wave::util::file_position_type
 507 >
 508 class xlex_functor
 509 :   public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
 510 {
 511 public:
 512
 513     typedef typename lexer<Iterator, Position>::token_type   token_type;
 514
 515     xlex_functor(Iterator const &first, Iterator const &last,
 516             Position const &pos, boost::wave::language_support language)
 517     :   lexer_(first, last, pos, language)
 518     {}
 519     virtual ~xlex_functor() {}
 520
 521 // get the next token from the input stream
 522     token_type& get(token_type& t) { return lexer_.get(t); }
 523     void set_position(Position const &pos) { lexer_.set_position(pos); }
 524
 525 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 526     bool has_include_guards(std::string& guard_name) const
 527         { return lexer_.has_include_guards(guard_name); }
 528 #endif
 529
 530 private:
 531     lexer<Iterator, Position> lexer_;
 532 };
 533
 534 }   // namespace lexer
 535
 536 ///////////////////////////////////////////////////////////////////////////////
 537 //
 538 //  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
 539 //  should be defined inline, if the lex_functor shouldn't be instantiated
 540 //  separately from the lex_iterator.
 541 //
 542 //  Separate (explicit) instantiation helps to reduce compilation time.
 543 //
 544 ///////////////////////////////////////////////////////////////////////////////
 545
 546 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
 547 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
 548 #else
 549 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
 550 #endif
 551
 552 ///////////////////////////////////////////////////////////////////////////////
 553 //
 554 //  The 'new_lexer' function allows the opaque generation of a new lexer object.
 555 //  It is coupled to the iterator type to allow to decouple the lexer/iterator
 556 //  configurations at compile time.
 557 //
 558 //  This function is declared inside the xlex_interface.hpp file, which is
 559 //  referenced by the source file calling the lexer and the source file, which
 560 //  instantiates the lex_functor. But it is defined here, so it will be
 561 //  instantiated only while compiling the source file, which instantiates the
 562 //  lex_functor. While the xlex_interface.hpp file may be included everywhere,
 563 //  this file (xlex_lexer.hpp) should be included only once. This allows
 564 //  to decouple the lexer interface from the lexer implementation and reduces
 565 //  compilation time.
 566 //
 567 ///////////////////////////////////////////////////////////////////////////////
 568
 569 template <typename Iterator, typename Position>
 570 BOOST_WAVE_XLEX_NEW_LEXER_INLINE
 571 lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
 572 new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
 573     Iterator const &last, Position const &pos,
 574     wave::language_support language)
 575 {
 576     return new lexer::xlex_functor<Iterator, Position>(
 577         first, last, pos, language);
 578 }
 579
 580 #undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
 581
 582 ///////////////////////////////////////////////////////////////////////////////
 583 }   // namespace xlex
 584 }   // namespace cpplexer
 585 }   // namespace wave
 586 }   // namespace boost
 587
 588 #endif // !defined(XLEX_LEXER_HPP)