Imported Upstream version 1.72.0
[platform/upstream/boost.git] / boost / spirit / home / support / char_encoding / ascii.hpp
1 /*=============================================================================
2     Copyright (c) 2001-2011 Hartmut Kaiser
3     Copyright (c) 2001-2011 Joel de Guzman
4
5     Distributed under the Boost Software License, Version 1.0. (See accompanying
6     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
10
11 #if defined(_MSC_VER)
12 #pragma once
13 #endif
14
15 #include <climits>
16 #include <boost/assert.hpp>
17 #include <boost/cstdint.hpp>
18
19 ///////////////////////////////////////////////////////////////////////////////
20 // constants used to classify the single characters
21 ///////////////////////////////////////////////////////////////////////////////
22 #define BOOST_CC_DIGIT    0x0001
23 #define BOOST_CC_XDIGIT   0x0002
24 #define BOOST_CC_ALPHA    0x0004
25 #define BOOST_CC_CTRL     0x0008
26 #define BOOST_CC_LOWER    0x0010
27 #define BOOST_CC_UPPER    0x0020
28 #define BOOST_CC_SPACE    0x0040
29 #define BOOST_CC_PUNCT    0x0080
30
31 namespace boost { namespace spirit { namespace char_encoding
32 {
33     // The detection of isgraph(), isprint() and isblank() is done programmatically
34     // to keep the character type table small. Additionally, these functions are
35     // rather seldom used and the programmatic detection is very simple.
36
37     ///////////////////////////////////////////////////////////////////////////
38     // ASCII character classification table
39     ///////////////////////////////////////////////////////////////////////////
40     const unsigned char ascii_char_types[] =
41     {
42         /* NUL   0   0 */   BOOST_CC_CTRL,
43         /* SOH   1   1 */   BOOST_CC_CTRL,
44         /* STX   2   2 */   BOOST_CC_CTRL,
45         /* ETX   3   3 */   BOOST_CC_CTRL,
46         /* EOT   4   4 */   BOOST_CC_CTRL,
47         /* ENQ   5   5 */   BOOST_CC_CTRL,
48         /* ACK   6   6 */   BOOST_CC_CTRL,
49         /* BEL   7   7 */   BOOST_CC_CTRL,
50         /* BS    8   8 */   BOOST_CC_CTRL,
51         /* HT    9   9 */   BOOST_CC_CTRL|BOOST_CC_SPACE,
52         /* NL   10   a */   BOOST_CC_CTRL|BOOST_CC_SPACE,
53         /* VT   11   b */   BOOST_CC_CTRL|BOOST_CC_SPACE,
54         /* NP   12   c */   BOOST_CC_CTRL|BOOST_CC_SPACE,
55         /* CR   13   d */   BOOST_CC_CTRL|BOOST_CC_SPACE,
56         /* SO   14   e */   BOOST_CC_CTRL,
57         /* SI   15   f */   BOOST_CC_CTRL,
58         /* DLE  16  10 */   BOOST_CC_CTRL,
59         /* DC1  17  11 */   BOOST_CC_CTRL,
60         /* DC2  18  12 */   BOOST_CC_CTRL,
61         /* DC3  19  13 */   BOOST_CC_CTRL,
62         /* DC4  20  14 */   BOOST_CC_CTRL,
63         /* NAK  21  15 */   BOOST_CC_CTRL,
64         /* SYN  22  16 */   BOOST_CC_CTRL,
65         /* ETB  23  17 */   BOOST_CC_CTRL,
66         /* CAN  24  18 */   BOOST_CC_CTRL,
67         /* EM   25  19 */   BOOST_CC_CTRL,
68         /* SUB  26  1a */   BOOST_CC_CTRL,
69         /* ESC  27  1b */   BOOST_CC_CTRL,
70         /* FS   28  1c */   BOOST_CC_CTRL,
71         /* GS   29  1d */   BOOST_CC_CTRL,
72         /* RS   30  1e */   BOOST_CC_CTRL,
73         /* US   31  1f */   BOOST_CC_CTRL,
74         /* SP   32  20 */   BOOST_CC_SPACE,
75         /*  !   33  21 */   BOOST_CC_PUNCT,
76         /*  "   34  22 */   BOOST_CC_PUNCT,
77         /*  #   35  23 */   BOOST_CC_PUNCT,
78         /*  $   36  24 */   BOOST_CC_PUNCT,
79         /*  %   37  25 */   BOOST_CC_PUNCT,
80         /*  &   38  26 */   BOOST_CC_PUNCT,
81         /*  '   39  27 */   BOOST_CC_PUNCT,
82         /*  (   40  28 */   BOOST_CC_PUNCT,
83         /*  )   41  29 */   BOOST_CC_PUNCT,
84         /*  *   42  2a */   BOOST_CC_PUNCT,
85         /*  +   43  2b */   BOOST_CC_PUNCT,
86         /*  ,   44  2c */   BOOST_CC_PUNCT,
87         /*  -   45  2d */   BOOST_CC_PUNCT,
88         /*  .   46  2e */   BOOST_CC_PUNCT,
89         /*  /   47  2f */   BOOST_CC_PUNCT,
90         /*  0   48  30 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
91         /*  1   49  31 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
92         /*  2   50  32 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
93         /*  3   51  33 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
94         /*  4   52  34 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
95         /*  5   53  35 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
96         /*  6   54  36 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
97         /*  7   55  37 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
98         /*  8   56  38 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
99         /*  9   57  39 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
100         /*  :   58  3a */   BOOST_CC_PUNCT,
101         /*  ;   59  3b */   BOOST_CC_PUNCT,
102         /*  <   60  3c */   BOOST_CC_PUNCT,
103         /*  =   61  3d */   BOOST_CC_PUNCT,
104         /*  >   62  3e */   BOOST_CC_PUNCT,
105         /*  ?   63  3f */   BOOST_CC_PUNCT,
106         /*  @   64  40 */   BOOST_CC_PUNCT,
107         /*  A   65  41 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
108         /*  B   66  42 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
109         /*  C   67  43 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
110         /*  D   68  44 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
111         /*  E   69  45 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
112         /*  F   70  46 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
113         /*  G   71  47 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
114         /*  H   72  48 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
115         /*  I   73  49 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
116         /*  J   74  4a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
117         /*  K   75  4b */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
118         /*  L   76  4c */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
119         /*  M   77  4d */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
120         /*  N   78  4e */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
121         /*  O   79  4f */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
122         /*  P   80  50 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
123         /*  Q   81  51 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
124         /*  R   82  52 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
125         /*  S   83  53 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
126         /*  T   84  54 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
127         /*  U   85  55 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
128         /*  V   86  56 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
129         /*  W   87  57 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
130         /*  X   88  58 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
131         /*  Y   89  59 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
132         /*  Z   90  5a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
133         /*  [   91  5b */   BOOST_CC_PUNCT,
134         /*  \   92  5c */   BOOST_CC_PUNCT,
135         /*  ]   93  5d */   BOOST_CC_PUNCT,
136         /*  ^   94  5e */   BOOST_CC_PUNCT,
137         /*  _   95  5f */   BOOST_CC_PUNCT,
138         /*  `   96  60 */   BOOST_CC_PUNCT,
139         /*  a   97  61 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
140         /*  b   98  62 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
141         /*  c   99  63 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
142         /*  d  100  64 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
143         /*  e  101  65 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
144         /*  f  102  66 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
145         /*  g  103  67 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
146         /*  h  104  68 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
147         /*  i  105  69 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
148         /*  j  106  6a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
149         /*  k  107  6b */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
150         /*  l  108  6c */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
151         /*  m  109  6d */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
152         /*  n  110  6e */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
153         /*  o  111  6f */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
154         /*  p  112  70 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
155         /*  q  113  71 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
156         /*  r  114  72 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
157         /*  s  115  73 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
158         /*  t  116  74 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
159         /*  u  117  75 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
160         /*  v  118  76 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
161         /*  w  119  77 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
162         /*  x  120  78 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
163         /*  y  121  79 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
164         /*  z  122  7a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
165         /*  {  123  7b */   BOOST_CC_PUNCT,
166         /*  |  124  7c */   BOOST_CC_PUNCT,
167         /*  }  125  7d */   BOOST_CC_PUNCT,
168         /*  ~  126  7e */   BOOST_CC_PUNCT,
169         /* DEL 127  7f */   BOOST_CC_CTRL,
170         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178     };
179
180     ///////////////////////////////////////////////////////////////////////////
181     //  Test characters for specified conditions (using ASCII)
182     ///////////////////////////////////////////////////////////////////////////
183     struct ascii
184     {
185         typedef char char_type;
186         typedef unsigned char classify_type;
187
188         static bool
189         isascii_(int ch)
190         {
191             return 0 == (ch & ~0x7f);
192         }
193
194         static bool
195         ischar(int ch)
196         {
197             return isascii_(ch);
198         }
199
200         // *** Note on assertions: The precondition is that the calls to
201         // these functions do not violate the required range of ch (type int)
202         // which is that strict_ischar(ch) should be true. It is the
203         // responsibility of the caller to make sure this precondition is not
204         // violated.
205
206         static bool
207         strict_ischar(int ch)
208         {
209             return ch >= 0 && ch <= 127;
210         }
211
212         static bool
213         isalnum(int ch)
214         {
215             BOOST_ASSERT(strict_ischar(ch));
216             return (ascii_char_types[ch] & BOOST_CC_ALPHA)
217                 || (ascii_char_types[ch] & BOOST_CC_DIGIT);
218         }
219
220         static bool
221         isalpha(int ch)
222         {
223             BOOST_ASSERT(strict_ischar(ch));
224             return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
225         }
226
227         static bool
228         isdigit(int ch)
229         {
230             BOOST_ASSERT(strict_ischar(ch));
231             return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
232         }
233
234         static bool
235         isxdigit(int ch)
236         {
237             BOOST_ASSERT(strict_ischar(ch));
238             return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
239         }
240
241         static bool
242         iscntrl(int ch)
243         {
244             BOOST_ASSERT(strict_ischar(ch));
245             return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
246         }
247
248         static bool
249         isgraph(int ch)
250         {
251             BOOST_ASSERT(strict_ischar(ch));
252             return ('\x21' <= ch && ch <= '\x7e');
253         }
254
255         static bool
256         islower(int ch)
257         {
258             BOOST_ASSERT(strict_ischar(ch));
259             return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
260         }
261
262         static bool
263         isprint(int ch)
264         {
265             BOOST_ASSERT(strict_ischar(ch));
266             return ('\x20' <= ch && ch <= '\x7e');
267         }
268
269         static bool
270         ispunct(int ch)
271         {
272             BOOST_ASSERT(strict_ischar(ch));
273             return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
274         }
275
276         static bool
277         isspace(int ch)
278         {
279             BOOST_ASSERT(strict_ischar(ch));
280             return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
281         }
282
283         static bool
284         isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
285         {
286             BOOST_ASSERT(strict_ischar(ch));
287             return ('\x09' == ch || '\x20' == ch);
288         }
289
290         static bool
291         isupper(int ch)
292         {
293             BOOST_ASSERT(strict_ischar(ch));
294             return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
295         }
296
297         ///////////////////////////////////////////////////////////////////////
298         //  Simple character conversions
299         ///////////////////////////////////////////////////////////////////////
300
301         static int
302         tolower(int ch)
303         {
304             BOOST_ASSERT(strict_ischar(ch));
305             return isupper(ch) ? (ch - 'A' + 'a') : ch;
306         }
307
308         static int
309         toupper(int ch)
310         {
311             BOOST_ASSERT(strict_ischar(ch));
312             return islower(ch) ? (ch - 'a' + 'A') : ch;
313         }
314
315         static ::boost::uint32_t
316         toucs4(int ch)
317         {
318             BOOST_ASSERT(strict_ischar(ch));
319             return ch;
320         }
321     };
322
323 }}}
324
325 ///////////////////////////////////////////////////////////////////////////////
326 // undefine macros
327 ///////////////////////////////////////////////////////////////////////////////
328 #undef BOOST_CC_DIGIT
329 #undef BOOST_CC_XDIGIT
330 #undef BOOST_CC_ALPHA
331 #undef BOOST_CC_CTRL
332 #undef BOOST_CC_LOWER
333 #undef BOOST_CC_UPPER
334 #undef BOOST_CC_PUNCT
335 #undef BOOST_CC_SPACE
336
337 #endif