libs/spirit/example/lex/strip_comments_lexer.cpp

   1 //  Copyright (c) 2001-2010 Hartmut Kaiser
   2 //
   3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
   4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 //  This example is the equivalent to the following lex program:
   7 //
   8 //       %{
   9 //       /* INITIAL is the default start state.  COMMENT is our new  */
  10 //       /* state where we remove comments.                          */
  11 //       %}
  12 //
  13 //       %s COMMENT
  14 //       %%
  15 //       <INITIAL>"//".*    ;
  16 //       <INITIAL>"/*"      BEGIN COMMENT;
  17 //       <INITIAL>.         ECHO;
  18 //       <INITIAL>[\n]      ECHO;
  19 //       <COMMENT>"*/"      BEGIN INITIAL;
  20 //       <COMMENT>.         ;
  21 //       <COMMENT>[\n]      ;
  22 //       %%
  23 //
  24 //       main()
  25 //       {
  26 //         yylex();
  27 //       }
  28 //
  29 //  Its purpose is to strip comments out of C code.
  30 //
  31 //  Additionally this example demonstrates the use of lexer states to structure
  32 //  the lexer definition.
  33
  34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
  35
  36 #include <boost/config/warning_disable.hpp>
  37 #include <boost/spirit/include/lex_lexertl.hpp>
  38 #include <boost/spirit/include/phoenix_operator.hpp>
  39 #include <boost/spirit/include/phoenix_statement.hpp>
  40 #include <boost/spirit/include/phoenix_core.hpp>
  41
  42 #include <iostream>
  43 #include <string>
  44
  45 #include "example.hpp"
  46
  47 using namespace boost::spirit;
  48
  49 ///////////////////////////////////////////////////////////////////////////////
  50 //  Token definition: We use the lexertl based lexer engine as the underlying
  51 //                    lexer type.
  52 ///////////////////////////////////////////////////////////////////////////////
  53 enum tokenids
  54 {
  55     IDANY = lex::min_token_id + 10,
  56     IDEOL = lex::min_token_id + 11
  57 };
  58
  59 ///////////////////////////////////////////////////////////////////////////////
  60 // Simple custom semantic action function object used to print the matched
  61 // input sequence for a particular token
  62 template <typename Char, typename Traits>
  63 struct echo_input_functor
  64 {
  65     echo_input_functor (std::basic_ostream<Char, Traits>& os_)
  66       : os(os_) {}
  67
  68     // This is called by the semantic action handling code during the lexing
  69     template <typename Iterator, typename Context>
  70     void operator()(Iterator const& b, Iterator const& e
  71       , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  72       , std::size_t&, Context&) const
  73     {
  74         os << std::string(b, e);
  75     }
  76
  77     std::basic_ostream<Char, Traits>& os;
  78 };
  79
  80 template <typename Char, typename Traits>
  81 inline echo_input_functor<Char, Traits>
  82 echo_input(std::basic_ostream<Char, Traits>& os)
  83 {
  84     return echo_input_functor<Char, Traits>(os);
  85 }
  86
  87 ///////////////////////////////////////////////////////////////////////////////
  88 // Another simple custom semantic action function object used to switch the
  89 // state of the lexer
  90 struct set_lexer_state
  91 {
  92     set_lexer_state(char const* state_)
  93       : state(state_) {}
  94
  95     // This is called by the semantic action handling code during the lexing
  96     template <typename Iterator, typename Context>
  97     void operator()(Iterator const&, Iterator const&
  98       , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  99       , std::size_t&, Context& ctx) const
 100     {
 101         ctx.set_state_name(state.c_str());
 102     }
 103
 104     std::string state;
 105 };
 106
 107 ///////////////////////////////////////////////////////////////////////////////
 108 template <typename Lexer>
 109 struct strip_comments_tokens : lex::lexer<Lexer>
 110 {
 111     strip_comments_tokens()
 112       : strip_comments_tokens::base_type(lex::match_flags::match_default)
 113     {
 114         // define tokens and associate them with the lexer
 115         cppcomment = "\"//\"[^\n]*";    // '//[^\n]*'
 116         ccomment = "\"/*\"";            // '/*'
 117         endcomment = "\"*/\"";          // '*/'
 118         any = std::string(".");
 119         eol = "\n";
 120
 121         // The following tokens are associated with the default lexer state
 122         // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
 123         // strictly optional.
 124         this->self
 125             =   cppcomment
 126             |   ccomment    [ set_lexer_state("COMMENT") ]
 127             |   eol         [ echo_input(std::cout) ]
 128             |   any         [ echo_input(std::cout) ]
 129             ;
 130
 131         // The following tokens are associated with the lexer state 'COMMENT'.
 132         this->self("COMMENT")
 133             =   endcomment  [ set_lexer_state("INITIAL") ]
 134             |   "\n"
 135             |   std::string(".")
 136             ;
 137     }
 138
 139     lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
 140 };
 141
 142   ///////////////////////////////////////////////////////////////////////////////
 143 int main(int argc, char* argv[])
 144 {
 145     // iterator type used to expose the underlying input stream
 146     typedef std::string::iterator base_iterator_type;
 147
 148     // lexer type
 149     typedef
 150         lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
 151     lexer_type;
 152
 153     // now we use the types defined above to create the lexer and grammar
 154     // object instances needed to invoke the parsing process
 155     strip_comments_tokens<lexer_type> strip_comments;             // Our lexer
 156
 157     // No parsing is done alltogether, everything happens in the lexer semantic
 158     // actions.
 159     std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
 160     base_iterator_type first = str.begin();
 161     bool r = lex::tokenize(first, str.end(), strip_comments);
 162
 163     if (!r) {
 164         std::string rest(first, str.end());
 165         std::cerr << "Lexical analysis failed\n" << "stopped at: \""
 166                   << rest << "\"\n";
 167     }
 168     return 0;
 169 }
 170
 171
 172