2 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_INPUT
7 #define BOOST_LEXER_INPUT
9 #include "char_traits.hpp"
10 #include <boost/detail/iterator.hpp>
12 #include "state_machine.hpp"
18 template<typename FwdIter, typename Traits =
19 char_traits<typename boost::detail::iterator_traits<FwdIter>::value_type> >
26 #if defined _MSC_VER && _MSC_VER <= 1200
29 friend class basic_input;
35 std::size_t unique_id;
41 // Construct in end() state.
50 bool operator == (const data &rhs_) const
52 return id == rhs_.id && unique_id == rhs_.unique_id &&
53 start == rhs_.start && end == rhs_.end &&
54 bol == rhs_.bol && state == rhs_.state;
63 bool operator == (const iterator &rhs_) const
65 return _data == rhs_._data;
68 bool operator != (const iterator &rhs_) const
70 return !(*this == rhs_);
83 // Let compiler generate operator = ().
86 iterator &operator ++ ()
93 iterator operator ++ (int)
95 iterator iter_ = *this;
102 // Not owner (obviously!)
103 const basic_input *_input;
108 const detail::internals &internals_ =
109 _input->_state_machine->data ();
111 _data.start = _data.end;
113 if (internals_._dfa->size () == 1)
115 if (internals_._seen_BOL_assertion ||
116 internals_._seen_EOL_assertion)
119 (&internals_._lookup->front ()->front (),
120 internals_._dfa_alphabet.front (),
121 &internals_._dfa->front ()->front (),
122 _data.bol, _data.end, _input->_end, _data.unique_id);
126 _data.id = next (&internals_._lookup->front ()->front (),
127 internals_._dfa_alphabet.front (), &internals_.
128 _dfa->front ()->front (), _data.end, _input->_end,
134 if (internals_._seen_BOL_assertion ||
135 internals_._seen_EOL_assertion)
137 _data.id = next (internals_, _data.state,
138 _data.bol, _data.end, _input->_end, _data.unique_id);
142 _data.id = next (internals_, _data.state,
143 _data.end, _input->_end, _data.unique_id);
147 if (_data.end == _input->_end && _data.start == _data.end)
149 // Ensure current state matches that returned by end().
154 std::size_t next (const detail::internals &internals_,
155 std::size_t &start_state_, bool bol_,
156 FwdIter &start_token_, const FwdIter &end_,
157 std::size_t &unique_id_)
159 if (start_token_ == end_)
166 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
168 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
169 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
170 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
171 FwdIter curr_ = start_token_;
172 bool end_state_ = *ptr_ != 0;
173 std::size_t id_ = *(ptr_ + id_index);
174 std::size_t uid_ = *(ptr_ + unique_id_index);
175 std::size_t end_start_state_ = start_state_;
176 bool end_bol_ = bol_;
177 FwdIter end_token_ = start_token_;
179 while (curr_ != end_)
181 const std::size_t BOL_state_ = ptr_[bol_index];
182 const std::size_t EOL_state_ = ptr_[eol_index];
184 if (BOL_state_ && bol_)
186 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
188 else if (EOL_state_ && *curr_ == '\n')
190 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
194 typename Traits::char_type prev_char_ = *curr_++;
196 bol_ = prev_char_ == '\n';
198 const std::size_t state_ =
199 ptr_[lookup_[static_cast<typename Traits::index_type>
207 ptr_ = &dfa_[state_ * dfa_alphabet_];
213 id_ = *(ptr_ + id_index);
214 uid_ = *(ptr_ + unique_id_index);
215 end_start_state_ = *(ptr_ + state_index);
221 const std::size_t EOL_state_ = ptr_[eol_index];
223 if (EOL_state_ && curr_ == end_)
225 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
230 id_ = *(ptr_ + id_index);
231 uid_ = *(ptr_ + unique_id_index);
232 end_start_state_ = *(ptr_ + state_index);
240 // return longest match
241 start_state_ = end_start_state_;
242 start_token_ = end_token_;
251 _data.bol = end_bol_;
256 // No match causes char to be skipped
257 _data.bol = *start_token_ == '\n';
267 std::size_t next (const detail::internals &internals_,
268 std::size_t &start_state_, FwdIter &start_token_,
269 FwdIter const &end_, std::size_t &unique_id_)
271 if (start_token_ == end_)
278 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
280 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
281 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
282 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
283 FwdIter curr_ = start_token_;
284 bool end_state_ = *ptr_ != 0;
285 std::size_t id_ = *(ptr_ + id_index);
286 std::size_t uid_ = *(ptr_ + unique_id_index);
287 std::size_t end_start_state_ = start_state_;
288 FwdIter end_token_ = start_token_;
290 while (curr_ != end_)
292 const std::size_t state_ = ptr_[lookup_[static_cast
293 <typename Traits::index_type>(*curr_++)]];
300 ptr_ = &dfa_[state_ * dfa_alphabet_];
305 id_ = *(ptr_ + id_index);
306 uid_ = *(ptr_ + unique_id_index);
307 end_start_state_ = *(ptr_ + state_index);
314 // return longest match
315 start_state_ = end_start_state_;
316 start_token_ = end_token_;
318 if (id_ == 0) goto again;
322 // No match causes char to be skipped
332 std::size_t next (const std::size_t * const lookup_,
333 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
334 bool bol_, FwdIter &start_token_, FwdIter const &end_,
335 std::size_t &unique_id_)
337 if (start_token_ == end_)
343 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
344 FwdIter curr_ = start_token_;
345 bool end_state_ = *ptr_ != 0;
346 std::size_t id_ = *(ptr_ + id_index);
347 std::size_t uid_ = *(ptr_ + unique_id_index);
348 bool end_bol_ = bol_;
349 FwdIter end_token_ = start_token_;
351 while (curr_ != end_)
353 const std::size_t BOL_state_ = ptr_[bol_index];
354 const std::size_t EOL_state_ = ptr_[eol_index];
356 if (BOL_state_ && bol_)
358 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
360 else if (EOL_state_ && *curr_ == '\n')
362 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
366 typename Traits::char_type prev_char_ = *curr_++;
368 bol_ = prev_char_ == '\n';
370 const std::size_t state_ =
371 ptr_[lookup_[static_cast<typename Traits::index_type>
379 ptr_ = &dfa_[state_ * dfa_alphabet_];
385 id_ = *(ptr_ + id_index);
386 uid_ = *(ptr_ + unique_id_index);
392 const std::size_t EOL_state_ = ptr_[eol_index];
394 if (EOL_state_ && curr_ == end_)
396 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
401 id_ = *(ptr_ + id_index);
402 uid_ = *(ptr_ + unique_id_index);
410 // return longest match
411 _data.bol = end_bol_;
412 start_token_ = end_token_;
416 // No match causes char to be skipped
417 _data.bol = *start_token_ == '\n';
427 std::size_t next (const std::size_t * const lookup_,
428 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
429 FwdIter &start_token_, FwdIter const &end_,
430 std::size_t &unique_id_)
432 if (start_token_ == end_)
438 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
439 FwdIter curr_ = start_token_;
440 bool end_state_ = *ptr_ != 0;
441 std::size_t id_ = *(ptr_ + id_index);
442 std::size_t uid_ = *(ptr_ + unique_id_index);
443 FwdIter end_token_ = start_token_;
445 while (curr_ != end_)
447 const std::size_t state_ = ptr_[lookup_[static_cast
448 <typename Traits::index_type>(*curr_++)]];
455 ptr_ = &dfa_[state_ * dfa_alphabet_];
460 id_ = *(ptr_ + id_index);
461 uid_ = *(ptr_ + unique_id_index);
468 // return longest match
469 start_token_ = end_token_;
473 // No match causes char to be skipped
484 #if defined _MSC_VER && _MSC_VER <= 1200
487 friend class iterator;
490 // Make it explict that we are NOT taking a copy of state_machine_!
491 basic_input (const basic_state_machine<typename Traits::char_type>
492 *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
493 _state_machine (state_machine_),
499 iterator begin () const
504 // Over-ride default of 0 (EOI)
505 iter_._data.id = npos;
506 iter_._data.start = _begin;
507 iter_._data.end = _begin;
508 iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
509 iter_._data.state = 0;
514 iterator end () const
519 iter_._data.start = _end;
520 iter_._data.end = _end;
525 const basic_state_machine<typename Traits::char_type> *_state_machine;
530 typedef basic_input<std::string::iterator> iter_input;
531 typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
532 typedef basic_input<const char *> ptr_input;
533 typedef basic_input<const wchar_t *> ptr_winput;