13 #define TK_NameSep 197
15 #define TK_PlusPlus 212
16 #define TK_MinusMinus 213
17 #define TK_ArrowStar 214
18 #define TK_DotStar 215
19 #define TK_ShiftLeft 216
20 #define TK_ShiftRight 217
21 #define TK_IntegerDecimal 218
22 #define TK_IntegerOctal 219
23 #define TK_IntegerHex 220
24 #define TK_EqualsEquals 223
25 #define TK_NotEquals 224
28 #define TK_MultAssign 227
29 #define TK_DivAssign 228
30 #define TK_PercentAssign 229
31 #define TK_PlusAssign 230
32 #define TK_MinusAssign 231
33 #define TK_AmpAssign 232
34 #define TK_CaretAssign 233
35 #define TK_BarAssign 234
36 #define TK_DotDotDot 240
37 #define TK_Whitespace 241
38 #define TK_Comment 242
46 void token( int tok );
47 void run( const char *buf );
54 # Process all comments, relies on isCxx being set.
60 cout << "comm char: " << ts[0] << endl;
61 cout << "comm char: " << ts[1] << endl;
69 cout << "comm char: " << ts[0] << endl;
73 cout << "comm char: " << ts[0] << endl;
79 # Single and double literals.
80 ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit );};
81 ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit );};
84 ( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id ); };
87 fract_const = digit* '.' digit+ | digit+ '.';
88 exponent = [eE] [+\-]? digit+;
89 float_suffix = [flFL];
91 ( fract_const exponent? float_suffix? |
92 digit+ exponent float_suffix? ) { token( TK_Float );};
94 # Integer decimal. Leading part buffered by float.
95 ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal );};
97 # Integer octal. Leading part buffered by float.
98 ( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal );};
100 # Integer hex. Leading 0 buffered by float.
101 ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex );};
103 # Only buffer the second item, first buffered by symbol. */
104 '::' {token( TK_NameSep );};
105 '==' {token( TK_EqualsEquals );};
106 '!=' {token( TK_NotEquals );};
107 '&&' {token( TK_AndAnd );};
108 '||' {token( TK_OrOr );};
109 '*=' {token( TK_MultAssign );};
110 '/=' {token( TK_DivAssign );};
111 '%=' {token( TK_PercentAssign );};
112 '+=' {token( TK_PlusAssign );};
113 '-=' {token( TK_MinusAssign );};
114 '&=' {token( TK_AmpAssign );};
115 '^=' {token( TK_CaretAssign );};
116 '|=' {token( TK_BarAssign );};
117 '++' {token( TK_PlusPlus );};
118 '--' {token( TK_MinusMinus );};
119 '->' {token( TK_Arrow );};
120 '->*' {token( TK_ArrowStar );};
121 '.*' {token( TK_DotStar );};
123 # Three char compounds, first item already buffered. */
124 '...' { token( TK_DotDotDot );};
126 # Single char symbols.
127 ( punct - [_"'] ) { token( ts[0] );};
129 # Comments and whitespace. Handle these outside of the machine so that se
130 # don't end up buffering the comments.
131 '/*' { isCxx = false; fgoto comment; };
132 '//' { isCxx = true; fgoto comment; };
134 ( any - 33..126 )+ { token( TK_Whitespace );};
139 %% write data nofinal;
141 void Scanner::token( int tok )
143 const char *data = ts;
145 cout << "<" << tok << "> ";
147 for ( int i = 0; i < len; i++ )
153 void Scanner::run( const char *buf )
155 int len = strlen( buf );
158 const char *pe = buf + len;
159 const char *eof = pe;
162 if ( cs == Scanner_error ) {
163 /* Machine failed before finding a token. */
164 cout << "PARSE ERROR" << endl;
173 "/*hi there*/ hello 0x88"
178 #ifdef _____OUTPUT_____