fe5412e005876f6e3af25fcfb9b269800e6455c0
[platform/upstream/boost.git] / boost / spirit / home / support / detail / lexer / file_input.hpp
1 // file_input.hpp
2 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_FILE_INPUT
7 #define BOOST_LEXER_FILE_INPUT
8
9 #include "char_traits.hpp"
10 // memcpy
11 #include <cstring>
12 #include <fstream>
13 #include "size_t.hpp"
14 #include "state_machine.hpp"
15
16 namespace boost
17 {
18 namespace lexer
19 {
20 template<typename CharT, typename Traits = char_traits<CharT> >
21 class basic_file_input
22 {
23 public:
24     class iterator
25     {
26     public:
27         friend class basic_file_input;
28
29         struct data
30         {
31             std::size_t id;
32             std::size_t unique_id;
33             const CharT *start;
34             const CharT *end;
35             std::size_t state;
36
37             // Construct in end() state.
38             data () :
39                 id (0),
40                 unique_id (npos),
41                 state (npos)
42             {
43             }
44
45             bool operator == (const data &rhs_) const
46             {
47                 return id == rhs_.id && unique_id == rhs_.unique_id &&
48                     start == rhs_.start && end == rhs_.end &&
49                     state == rhs_.state;
50             }
51         };
52
53         iterator () :
54             _input (0)
55         {
56         }
57
58         bool operator == (const iterator &rhs_) const
59         {
60             return _data == rhs_._data;
61         }
62
63         bool operator != (const iterator &rhs_) const
64         {
65             return !(*this == rhs_);
66         }
67
68         data &operator * ()
69         {
70             return _data;
71         }
72
73         data *operator -> ()
74         {
75             return &_data;
76         }
77
78         // Let compiler generate operator = ().
79
80         // prefix version
81         iterator &operator ++ ()
82         {
83             next_token ();
84             return *this;
85         }
86
87         // postfix version
88         iterator operator ++ (int)
89         {
90             iterator iter_ = *this;
91
92             next_token ();
93             return iter_;
94         }
95
96         void next_token ()
97         {
98             const detail::internals &internals_ =
99                 _input->_state_machine->data ();
100
101             _data.start = _data.end;
102
103             if (internals_._dfa->size () == 1)
104             {
105                 _data.id = _input->next (&internals_._lookup->front ()->
106                     front (), internals_._dfa_alphabet.front (),
107                     &internals_._dfa->front ()->front (), _data.start,
108                     _data.end, _data.unique_id);
109             }
110             else
111             {
112                 _data.id = _input->next (internals_, _data.state, _data.start,
113                     _data.end, _data.unique_id);
114             }
115
116             if (_data.id == 0)
117             {
118                 _data.start = 0;
119                 _data.end = 0;
120                 // Ensure current state matches that returned by end().
121                 _data.state = npos;
122             }
123         }
124
125     private:
126         // Not owner (obviously!)
127         basic_file_input *_input;
128         data _data;
129     };
130
131     friend class iterator;
132
133     // Make it explict that we are NOT taking a copy of state_machine_!
134     basic_file_input (const basic_state_machine<CharT> *state_machine_,
135         std::basic_ifstream<CharT> *is_,
136         const std::streamsize buffer_size_ = 4096,
137         const std::streamsize buffer_increment_ = 1024) :
138         _state_machine (state_machine_),
139         _stream (is_),
140         _buffer_size (buffer_size_),
141         _buffer_increment (buffer_increment_),
142         _buffer (_buffer_size, '!')
143     {
144         _start_buffer = &_buffer.front ();
145         _end_buffer = _start_buffer + _buffer.size ();
146         _start_token = _end_buffer;
147         _end_token = _end_buffer;
148     }
149
150     iterator begin ()
151     {
152         iterator iter_;
153
154         iter_._input = this;
155         // Over-ride default of 0 (EOF)
156         iter_._data.id = npos;
157         iter_._data.start = 0;
158         iter_._data.end = 0;
159         iter_._data.state = 0;
160         ++iter_;
161         return iter_;
162     }
163
164     iterator end ()
165     {
166         iterator iter_;
167
168         iter_._input = this;
169         iter_._data.start = 0;
170         iter_._data.end = 0;
171         return iter_;
172     }
173
174     void flush ()
175     {
176         // This temporary is mandatory, otherwise the
177         // pointer calculations won't work!
178         const CharT *temp_ = _end_buffer;
179
180         _start_token = _end_token = _end_buffer;
181         reload_buffer (temp_, true, _end_token);
182     }
183
184 private:
185     typedef std::basic_istream<CharT> istream;
186     typedef std::vector<CharT> buffer;
187
188     const basic_state_machine<CharT> *_state_machine;
189     const std::streamsize _buffer_size;
190     const std::streamsize _buffer_increment;
191
192     buffer _buffer;
193     CharT *_start_buffer;
194     istream *_stream;
195     const CharT *_start_token;
196     const CharT *_end_token;
197     CharT *_end_buffer;
198
199     std::size_t next (const detail::internals &internals_,
200         std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
201         std::size_t &unique_id_)
202     {
203         _start_token = _end_token;
204
205 again:
206         const std::size_t * lookup_ = &internals_._lookup[start_state_]->
207             front ();
208         std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
209         const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
210         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
211         const CharT *curr_ = _start_token;
212         bool end_state_ = *ptr_ != 0;
213         std::size_t id_ = *(ptr_ + id_index);
214         std::size_t uid_ = *(ptr_ + unique_id_index);
215         const CharT *end_token_ = curr_;
216
217         for (;;)
218         {
219             if (curr_ >= _end_buffer)
220             {
221                 if (!reload_buffer (curr_, end_state_, end_token_))
222                 {
223                     // EOF
224                     break;
225                 }
226             }
227
228             const std::size_t BOL_state_ = ptr_[bol_index];
229             const std::size_t EOL_state_ = ptr_[eol_index];
230
231             if (BOL_state_ && (_start_token == _start_buffer ||
232                 *(_start_token - 1) == '\n'))
233             {
234                 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
235             }
236             else if (EOL_state_ && *curr_ == '\n')
237             {
238                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
239             }
240             else
241             {
242                 const std::size_t state_ =
243                     ptr_[lookup_[static_cast<typename Traits::index_type>
244                         (*curr_++)]];
245
246                 if (state_ == 0)
247                 {
248                     break;
249                 }
250
251                 ptr_ = &dfa_[state_ * dfa_alphabet_];
252             }
253
254             if (*ptr_)
255             {
256                 end_state_ = true;
257                 id_ = *(ptr_ + id_index);
258                 uid_ = *(ptr_ + unique_id_index);
259                 start_state_ = *(ptr_ + state_index);
260                 end_token_ = curr_;
261             }
262         }
263
264         if (_start_token >= _end_buffer)
265         {
266             // No more tokens...
267             unique_id_ = npos;
268             return 0;
269         }
270
271         const std::size_t EOL_state_ = ptr_[eol_index];
272
273         if (EOL_state_ && curr_ == end_)
274         {
275             ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
276
277             if (*ptr_)
278             {
279                 end_state_ = true;
280                 id_ = *(ptr_ + id_index);
281                 uid_ = *(ptr_ + unique_id_index);
282                 start_state_ = *(ptr_ + state_index);
283                 end_token_ = curr_;
284             }
285         }
286
287         if (end_state_)
288         {
289             // return longest match
290             _end_token = end_token_;
291
292             if (id_ == 0) goto again;
293         }
294         else
295         {
296             // No match causes char to be skipped
297             _end_token = _start_token + 1;
298             id_ = npos;
299             uid_ = npos;
300         }
301
302         start_ = _start_token;
303         end_ = _end_token;
304         unique_id_ = uid_;
305         return id_;
306     }
307
308     std::size_t next (const std::size_t * const lookup_,
309         const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
310         const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
311     {
312         _start_token = _end_token;
313
314         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
315         const CharT *curr_ = _start_token;
316         bool end_state_ = *ptr_ != 0;
317         std::size_t id_ = *(ptr_ + id_index);
318         std::size_t uid_ = *(ptr_ + unique_id_index);
319         const CharT *end_token_ = curr_;
320
321         for (;;)
322         {
323             if (curr_ >= _end_buffer)
324             {
325                 if (!reload_buffer (curr_, end_state_, end_token_))
326                 {
327                     // EOF
328                     break;
329                 }
330             }
331
332             const std::size_t BOL_state_ = ptr_[bol_index];
333             const std::size_t EOL_state_ = ptr_[eol_index];
334
335             if (BOL_state_ && (_start_token == _start_buffer ||
336                 *(_start_token - 1) == '\n'))
337             {
338                 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
339             }
340             else if (EOL_state_ && *curr_ == '\n')
341             {
342                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
343             }
344             else
345             {
346                 const std::size_t state_ =
347                     ptr_[lookup_[static_cast<typename Traits::index_type>
348                         (*curr_++)]];
349
350                 if (state_ == 0)
351                 {
352                     break;
353                 }
354
355                 ptr_ = &dfa_[state_ * dfa_alphabet_];
356             }
357
358             if (*ptr_)
359             {
360                 end_state_ = true;
361                 id_ = *(ptr_ + id_index);
362                 uid_ = *(ptr_ + unique_id_index);
363                 end_token_ = curr_;
364             }
365         }
366
367         if (_start_token >= _end_buffer)
368         {
369             // No more tokens...
370             unique_id_ = npos;
371             return 0;
372         }
373
374         const std::size_t EOL_state_ = ptr_[eol_index];
375
376         if (EOL_state_ && curr_ == end_)
377         {
378             ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
379
380             if (*ptr_)
381             {
382                 end_state_ = true;
383                 id_ = *(ptr_ + id_index);
384                 uid_ = *(ptr_ + unique_id_index);
385                 end_token_ = curr_;
386             }
387         }
388
389         if (end_state_)
390         {
391             // return longest match
392             _end_token = end_token_;
393         }
394         else
395         {
396             // No match causes char to be skipped
397             _end_token = _start_token + 1;
398             id_ = npos;
399             uid_ = npos;
400         }
401
402         start_ = _start_token;
403         end_ = _end_token;
404         unique_id_ = uid_;
405         return id_;
406     }
407
408     bool reload_buffer (const CharT * &curr_, const bool end_state_,
409         const CharT * &end_token_)
410     {
411         bool success_ = !_stream->eof ();
412
413         if (success_)
414         {
415             const CharT *old_start_token_ = _start_token;
416             std::size_t old_size_ = _buffer.size ();
417             std::size_t count_ = 0;
418
419             if (_start_token - 1 == _start_buffer)
420             {
421                 // Run out of buffer space, so increase.
422                 _buffer.resize (old_size_ + _buffer_increment, '!');
423                 _start_buffer = &_buffer.front ();
424                 _start_token = _start_buffer + 1;
425                 _stream->read (_start_buffer + old_size_,
426                     _buffer_increment);
427                 count_ = _stream->gcount ();
428                 _end_buffer = _start_buffer + old_size_ + count_;
429             }
430             else if (_start_token < _end_buffer)
431             {
432                 const std::size_t len_ = _end_buffer - _start_token;
433                 // Some systems have memcpy in namespace std.
434                 using namespace std;
435
436                 memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
437                     sizeof (CharT));
438                 _stream->read (_start_buffer + len_ + 1,
439                     static_cast<std::streamsize> (_buffer.size () - len_ - 1));
440                 count_ = _stream->gcount ();
441                 _start_token = _start_buffer + 1;
442                 _end_buffer = _start_buffer + len_ + 1 + count_;
443             }
444             else
445             {
446                 _stream->read (_start_buffer, static_cast<std::streamsize>
447                     (_buffer.size ()));
448                 count_ = _stream->gcount ();
449                 _start_token = _start_buffer;
450                 _end_buffer = _start_buffer + count_;
451             }
452
453             if (end_state_)
454             {
455                 end_token_ = _start_token +
456                     (end_token_ - old_start_token_);
457             }
458
459             curr_ = _start_token + (curr_ - old_start_token_);
460         }
461
462         return success_;
463     }
464
465     // Disallow copying of buffer
466     basic_file_input (const basic_file_input &);
467     const basic_file_input &operator = (const basic_file_input &);
468 };
469
470 typedef basic_file_input<char> file_input;
471 typedef basic_file_input<wchar_t> wfile_input;
472 }
473 }
474
475 #endif