libs/spirit/example/lex/static_lexer/word_count_static.cpp

   1 //  Copyright (c) 2001-2010 Hartmut Kaiser
   2 //
   3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
   4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 //  The purpose of this example is to show, how it is possible to use a lexer
   7 //  token definition for two purposes:
   8 //
   9 //    . To generate C++ code implementing a static lexical analyzer allowing
  10 //      to recognize all defined tokens
  11 //    . To integrate the generated C++ lexer into the /Spirit/ framework.
  12 //
  13
  14 // #define BOOST_SPIRIT_LEXERTL_DEBUG
  15 #define BOOST_VARIANT_MINIMIZE_SIZE
  16
  17 #include <boost/config/warning_disable.hpp>
  18 #include <boost/spirit/include/qi.hpp>
  19 //[wc_static_include
  20 #include <boost/spirit/include/lex_static_lexertl.hpp>
  21 //]
  22 #include <boost/spirit/include/phoenix_operator.hpp>
  23 #include <boost/spirit/include/phoenix_statement.hpp>
  24 #include <boost/spirit/include/phoenix_container.hpp>
  25
  26 #include <iostream>
  27 #include <string>
  28
  29 #include "../example.hpp"
  30 #include "word_count_tokens.hpp"          // token definition
  31 #include "word_count_static.hpp"          // generated tokenizer
  32
  33 using namespace boost::spirit;
  34 using namespace boost::spirit::ascii;
  35
  36 ///////////////////////////////////////////////////////////////////////////////
  37 //  Grammar definition
  38 ///////////////////////////////////////////////////////////////////////////////
  39 //[wc_static_grammar
  40 //  This is an ordinary grammar definition following the rules defined by
  41 //  Spirit.Qi. There is nothing specific about it, except it gets the token
  42 //  definition class instance passed to the constructor to allow accessing the
  43 //  embedded token_def<> instances.
  44 template <typename Iterator>
  45 struct word_count_grammar : qi::grammar<Iterator>
  46 {
  47     template <typename TokenDef>
  48     word_count_grammar(TokenDef const& tok)
  49       : word_count_grammar::base_type(start)
  50       , c(0), w(0), l(0)
  51     {
  52         using boost::phoenix::ref;
  53         using boost::phoenix::size;
  54
  55         //  associate the defined tokens with the lexer, at the same time
  56         //  defining the actions to be executed
  57         start =  *(   tok.word          [ ++ref(w), ref(c) += size(_1) ]
  58                   |   lit('\n')         [ ++ref(l), ++ref(c) ]
  59                   |   qi::token(IDANY)  [ ++ref(c) ]
  60                   )
  61               ;
  62     }
  63
  64     std::size_t c, w, l;      // counter for characters, words, and lines
  65     qi::rule<Iterator> start;
  66 };
  67 //]
  68
  69 ///////////////////////////////////////////////////////////////////////////////
  70 //[wc_static_main
  71 int main(int argc, char* argv[])
  72 {
  73     // Define the token type to be used: 'std::string' is available as the type
  74     // of the token value.
  75     typedef lex::lexertl::token<
  76         char const*, boost::mpl::vector<std::string>
  77     > token_type;
  78
  79     // Define the lexer type to be used as the base class for our token
  80     // definition.
  81     //
  82     // This is the only place where the code is different from an equivalent
  83     // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of
  84     // the `lexertl::lexer<>` as the base class for our token defintion type.
  85     //
  86     // As we specified the suffix "wc" while generating the static tables we
  87     // need to pass the type lexertl::static_::lexer_wc as the second template
  88     // parameter below (see word_count_generate.cpp).
  89     typedef lex::lexertl::static_lexer<
  90         token_type, lex::lexertl::static_::lexer_wc
  91     > lexer_type;
  92
  93     // Define the iterator type exposed by the lexer.
  94     typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
  95
  96     // Now we use the types defined above to create the lexer and grammar
  97     // object instances needed to invoke the parsing process.
  98     word_count_tokens<lexer_type> word_count;           // Our lexer
  99     word_count_grammar<iterator_type> g (word_count);   // Our parser
 100
 101     // Read in the file into memory.
 102     std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
 103     char const* first = str.c_str();
 104     char const* last = &first[str.size()];
 105
 106     // Parsing is done based on the token stream, not the character stream.
 107     bool r = lex::tokenize_and_parse(first, last, word_count, g);
 108
 109     if (r) {    // success
 110         std::cout << "lines: " << g.l << ", words: " << g.w
 111                   << ", characters: " << g.c << "\n";
 112     }
 113     else {
 114         std::string rest(first, last);
 115         std::cerr << "Parsing failed\n" << "stopped at: \""
 116                   << rest << "\"\n";
 117     }
 118     return 0;
 119 }
 120 //]