Imported Upstream version 1.49.0
[platform/upstream/boost.git] / boost / spirit / home / support / detail / lexer / input.hpp
1 // input.hpp
2 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_INPUT
7 #define BOOST_LEXER_INPUT
8
9 #include "char_traits.hpp"
10 #include <boost/detail/iterator.hpp>
11 #include "size_t.hpp"
12 #include "state_machine.hpp"
13
14 namespace boost
15 {
16 namespace lexer
17 {
18 template<typename FwdIter, typename Traits =
19     char_traits<typename boost::detail::iterator_traits<FwdIter>::value_type> >
20 class basic_input
21 {
22 public:
23     class iterator
24     {
25     public:
26 #if defined _MSC_VER && _MSC_VER <= 1200
27         friend basic_input;
28 #else
29         friend class basic_input;
30 #endif
31
32         struct data
33         {
34             std::size_t id;
35             std::size_t unique_id;
36             FwdIter start;
37             FwdIter end;
38             bool bol;
39             std::size_t state;
40
41             // Construct in end() state.
42             data () :
43                 id (0),
44                 unique_id (npos),
45                 bol (false),
46                 state (npos)
47             {
48             }
49
50             bool operator == (const data &rhs_) const
51             {
52                 return id == rhs_.id && unique_id == rhs_.unique_id &&
53                     start == rhs_.start && end == rhs_.end &&
54                     bol == rhs_.bol && state == rhs_.state;
55             }
56         };
57
58         iterator () :
59             _input (0)
60         {
61         }
62
63         bool operator == (const iterator &rhs_) const
64         {
65             return _data == rhs_._data;
66         }
67
68         bool operator != (const iterator &rhs_) const
69         {
70             return !(*this == rhs_);
71         }
72
73         data &operator * ()
74         {
75             return _data;
76         }
77
78         data *operator -> ()
79         {
80             return &_data;
81         }
82
83         // Let compiler generate operator = ().
84
85         // prefix version
86         iterator &operator ++ ()
87         {
88             next_token ();
89             return *this;
90         }
91
92         // postfix version
93         iterator operator ++ (int)
94         {
95             iterator iter_ = *this;
96
97             next_token ();
98             return iter_;
99         }
100
101     private:
102         // Not owner (obviously!)
103         const basic_input *_input;
104         data _data;
105
106         void next_token ()
107         {
108             const detail::internals &internals_ =
109                 _input->_state_machine->data ();
110
111             _data.start = _data.end;
112
113             if (internals_._dfa->size () == 1)
114             {
115                 if (internals_._seen_BOL_assertion ||
116                     internals_._seen_EOL_assertion)
117                 {
118                     _data.id = next
119                         (&internals_._lookup->front ()->front (),
120                         internals_._dfa_alphabet.front (),
121                         &internals_._dfa->front ()->front (),
122                         _data.bol, _data.end, _input->_end, _data.unique_id);
123                 }
124                 else
125                 {
126                     _data.id = next (&internals_._lookup->front ()->front (),
127                         internals_._dfa_alphabet.front (), &internals_.
128                         _dfa->front ()->front (), _data.end, _input->_end,
129                         _data.unique_id);
130                 }
131             }
132             else
133             {
134                 if (internals_._seen_BOL_assertion ||
135                     internals_._seen_EOL_assertion)
136                 {
137                     _data.id = next (internals_, _data.state,
138                         _data.bol, _data.end, _input->_end, _data.unique_id);
139                 }
140                 else
141                 {
142                     _data.id = next (internals_, _data.state,
143                         _data.end, _input->_end, _data.unique_id);
144                 }
145             }
146
147             if (_data.end == _input->_end && _data.start == _data.end)
148             {
149                 // Ensure current state matches that returned by end().
150                 _data.state = npos;
151             }
152         }
153
154         std::size_t next (const detail::internals &internals_,
155             std::size_t &start_state_, bool bol_,
156             FwdIter &start_token_, const FwdIter &end_,
157             std::size_t &unique_id_)
158         {
159             if (start_token_ == end_)
160             {
161                 unique_id_ = npos;
162                 return 0;
163             }
164
165         again:
166             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
167                 front ();
168             std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
169             const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
170             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
171             FwdIter curr_ = start_token_;
172             bool end_state_ = *ptr_ != 0;
173             std::size_t id_ = *(ptr_ + id_index);
174             std::size_t uid_ = *(ptr_ + unique_id_index);
175             std::size_t end_start_state_ = start_state_;
176             bool end_bol_ = bol_;
177             FwdIter end_token_ = start_token_;
178
179             while (curr_ != end_)
180             {
181                 const std::size_t BOL_state_ = ptr_[bol_index];
182                 const std::size_t EOL_state_ = ptr_[eol_index];
183
184                 if (BOL_state_ && bol_)
185                 {
186                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
187                 }
188                 else if (EOL_state_ && *curr_ == '\n')
189                 {
190                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
191                 }
192                 else
193                 {
194                     typename Traits::char_type prev_char_ = *curr_++;
195
196                     bol_ = prev_char_ == '\n';
197
198                     const std::size_t state_ =
199                         ptr_[lookup_[static_cast<typename Traits::index_type>
200                         (prev_char_)]];
201
202                     if (state_ == 0)
203                     {
204                         break;
205                     }
206
207                     ptr_ = &dfa_[state_ * dfa_alphabet_];
208                 }
209
210                 if (*ptr_)
211                 {
212                     end_state_ = true;
213                     id_ = *(ptr_ + id_index);
214                     uid_ = *(ptr_ + unique_id_index);
215                     end_start_state_ = *(ptr_ + state_index);
216                     end_bol_ = bol_;
217                     end_token_ = curr_;
218                 }
219             }
220
221             const std::size_t EOL_state_ = ptr_[eol_index];
222
223             if (EOL_state_ && curr_ == end_)
224             {
225                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
226
227                 if (*ptr_)
228                 {
229                     end_state_ = true;
230                     id_ = *(ptr_ + id_index);
231                     uid_ = *(ptr_ + unique_id_index);
232                     end_start_state_ = *(ptr_ + state_index);
233                     end_bol_ = bol_;
234                     end_token_ = curr_;
235                 }
236             }
237
238             if (end_state_)
239             {
240                 // return longest match
241                 start_state_ = end_start_state_;
242                 start_token_ = end_token_;
243
244                 if (id_ == 0)
245                 {
246                     bol_ = end_bol_;
247                     goto again;
248                 }
249                 else
250                 {
251                     _data.bol = end_bol_;
252                 }
253             }
254             else
255             {
256                 // No match causes char to be skipped
257                 _data.bol = *start_token_ == '\n';
258                 ++start_token_;
259                 id_ = npos;
260                 uid_ = npos;
261             }
262
263             unique_id_ = uid_;
264             return id_;
265         }
266
267         std::size_t next (const detail::internals &internals_,
268             std::size_t &start_state_, FwdIter &start_token_,
269             FwdIter const &end_, std::size_t &unique_id_)
270         {
271             if (start_token_ == end_)
272             {
273                 unique_id_ = npos;
274                 return 0;
275             }
276
277         again:
278             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
279                 front ();
280             std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
281             const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
282             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
283             FwdIter curr_ = start_token_;
284             bool end_state_ = *ptr_ != 0;
285             std::size_t id_ = *(ptr_ + id_index);
286             std::size_t uid_ = *(ptr_ + unique_id_index);
287             std::size_t end_start_state_ = start_state_;
288             FwdIter end_token_ = start_token_;
289
290             while (curr_ != end_)
291             {
292                 const std::size_t state_ = ptr_[lookup_[static_cast
293                     <typename Traits::index_type>(*curr_++)]];
294
295                 if (state_ == 0)
296                 {
297                     break;
298                 }
299
300                 ptr_ = &dfa_[state_ * dfa_alphabet_];
301
302                 if (*ptr_)
303                 {
304                     end_state_ = true;
305                     id_ = *(ptr_ + id_index);
306                     uid_ = *(ptr_ + unique_id_index);
307                     end_start_state_ = *(ptr_ + state_index);
308                     end_token_ = curr_;
309                 }
310             }
311
312             if (end_state_)
313             {
314                 // return longest match
315                 start_state_ = end_start_state_;
316                 start_token_ = end_token_;
317
318                 if (id_ == 0) goto again;
319             }
320             else
321             {
322                 // No match causes char to be skipped
323                 ++start_token_;
324                 id_ = npos;
325                 uid_ = npos;
326             }
327
328             unique_id_ = uid_;
329             return id_;
330         }
331
332         std::size_t next (const std::size_t * const lookup_,
333             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
334             bool bol_, FwdIter &start_token_, FwdIter const &end_,
335             std::size_t &unique_id_)
336         {
337             if (start_token_ == end_)
338             {
339                 unique_id_ = npos;
340                 return 0;
341             }
342
343             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
344             FwdIter curr_ = start_token_;
345             bool end_state_ = *ptr_ != 0;
346             std::size_t id_ = *(ptr_ + id_index);
347             std::size_t uid_ = *(ptr_ + unique_id_index);
348             bool end_bol_ = bol_;
349             FwdIter end_token_ = start_token_;
350
351             while (curr_ != end_)
352             {
353                 const std::size_t BOL_state_ = ptr_[bol_index];
354                 const std::size_t EOL_state_ = ptr_[eol_index];
355
356                 if (BOL_state_ && bol_)
357                 {
358                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
359                 }
360                 else if (EOL_state_ && *curr_ == '\n')
361                 {
362                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
363                 }
364                 else
365                 {
366                     typename Traits::char_type prev_char_ = *curr_++;
367
368                     bol_ = prev_char_ == '\n';
369
370                     const std::size_t state_ =
371                         ptr_[lookup_[static_cast<typename Traits::index_type>
372                         (prev_char_)]];
373
374                     if (state_ == 0)
375                     {
376                         break;
377                     }
378
379                     ptr_ = &dfa_[state_ * dfa_alphabet_];
380                 }
381
382                 if (*ptr_)
383                 {
384                     end_state_ = true;
385                     id_ = *(ptr_ + id_index);
386                     uid_ = *(ptr_ + unique_id_index);
387                     end_bol_ = bol_;
388                     end_token_ = curr_;
389                 }
390             }
391
392             const std::size_t EOL_state_ = ptr_[eol_index];
393
394             if (EOL_state_ && curr_ == end_)
395             {
396                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
397
398                 if (*ptr_)
399                 {
400                     end_state_ = true;
401                     id_ = *(ptr_ + id_index);
402                     uid_ = *(ptr_ + unique_id_index);
403                     end_bol_ = bol_;
404                     end_token_ = curr_;
405                 }
406             }
407
408             if (end_state_)
409             {
410                 // return longest match
411                 _data.bol = end_bol_;
412                 start_token_ = end_token_;
413             }
414             else
415             {
416                 // No match causes char to be skipped
417                 _data.bol = *start_token_ == '\n';
418                 ++start_token_;
419                 id_ = npos;
420                 uid_ = npos;
421             }
422
423             unique_id_ = uid_;
424             return id_;
425         }
426
427         std::size_t next (const std::size_t * const lookup_,
428             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
429             FwdIter &start_token_, FwdIter const &end_,
430             std::size_t &unique_id_)
431         {
432             if (start_token_ == end_)
433             {
434                 unique_id_ = npos;
435                 return 0;
436             }
437
438             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
439             FwdIter curr_ = start_token_;
440             bool end_state_ = *ptr_ != 0;
441             std::size_t id_ = *(ptr_ + id_index);
442             std::size_t uid_ = *(ptr_ + unique_id_index);
443             FwdIter end_token_ = start_token_;
444
445             while (curr_ != end_)
446             {
447                 const std::size_t state_ = ptr_[lookup_[static_cast
448                     <typename Traits::index_type>(*curr_++)]];
449
450                 if (state_ == 0)
451                 {
452                     break;
453                 }
454
455                 ptr_ = &dfa_[state_ * dfa_alphabet_];
456
457                 if (*ptr_)
458                 {
459                     end_state_ = true;
460                     id_ = *(ptr_ + id_index);
461                     uid_ = *(ptr_ + unique_id_index);
462                     end_token_ = curr_;
463                 }
464             }
465
466             if (end_state_)
467             {
468                 // return longest match
469                 start_token_ = end_token_;
470             }
471             else
472             {
473                 // No match causes char to be skipped
474                 ++start_token_;
475                 id_ = npos;
476                 uid_ = npos;
477             }
478
479             unique_id_ = uid_;
480             return id_;
481         }
482     };
483
484 #if defined _MSC_VER && _MSC_VER <= 1200
485     friend iterator;
486 #else
487     friend class iterator;
488 #endif
489
490     // Make it explict that we are NOT taking a copy of state_machine_!
491     basic_input (const basic_state_machine<typename Traits::char_type>
492         *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
493         _state_machine (state_machine_),
494         _begin (begin_),
495         _end (end_)
496     {
497     }
498
499     iterator begin () const
500     {
501         iterator iter_;
502
503         iter_._input = this;
504         // Over-ride default of 0 (EOI)
505         iter_._data.id = npos;
506         iter_._data.start = _begin;
507         iter_._data.end = _begin;
508         iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
509         iter_._data.state = 0;
510         ++iter_;
511         return iter_;
512     }
513
514     iterator end () const
515     {
516         iterator iter_;
517
518         iter_._input = this;
519         iter_._data.start = _end;
520         iter_._data.end = _end;
521         return iter_;
522     }
523
524 private:
525     const basic_state_machine<typename Traits::char_type> *_state_machine;
526     FwdIter _begin;
527     FwdIter _end;
528 };
529
530 typedef basic_input<std::string::iterator> iter_input;
531 typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
532 typedef basic_input<const char *> ptr_input;
533 typedef basic_input<const wchar_t *> ptr_winput;
534 }
535 }
536
537 #endif