2 * A mini C-like language scanner.
12 newline = '\n' @{curline += 1;};
13 any_count_line = any | newline;
15 # Consume a C comment.
16 c_comment := any_count_line* :>> '*/' @{fgoto main;};
20 # Alpha numberic characters or underscore.
21 alnum_u = alnum | '_';
23 # Alpha charactres or underscore.
24 alpha_u = alpha | '_';
26 # Symbols. Upon entering clear the buffer. On all transitions
27 # buffer a character. Upon leaving dump the symbol.
29 printf( "symbol(%i): %c\n", curline, tokstart[0] );
32 # Identifier. Upon entering clear the buffer. On all transitions
33 # buffer a character. Upon leaving, dump the identifier.
35 printf( "ident(%i): ", curline );
36 fwrite( tokstart, 1, tokend-tokstart, stdout );
41 sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
42 '\'' . sliteralChar* . '\'' {
43 printf( "single_lit(%i): ", curline );
44 fwrite( tokstart, 1, tokend-tokstart, stdout );
49 dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
50 '"' . dliteralChar* . '"' {
51 printf( "double_lit(%i): ", curline );
52 fwrite( tokstart, 1, tokend-tokstart, stdout );
56 # Whitespace is standard ws, newlines and control codes.
57 any_count_line - 0x21..0x7e;
59 # Describe both c style comments and c++ style comments. The
60 # priority bump on tne terminator of the comments brings us
61 # out of the extend* which matches everything.
64 '/*' { fgoto c_comment; };
66 # Match an integer. We don't bother clearing the buf or filling it.
67 # The float machine overlaps with int and it will do it.
69 printf( "int(%i): ", curline );
70 fwrite( tokstart, 1, tokend-tokstart, stdout );
74 # Match a float. Upon entering the machine clear the buf, buffer
75 # characters on every trans and dump the float upon leaving.
77 printf( "float(%i): ", curline );
78 fwrite( tokstart, 1, tokend-tokstart, stdout );
82 # Match a hex. Upon entering the hex part, clear the buf, buffer characters
83 # on every trans and dump the hex on leaving transitions.
85 printf( "hex(%i): ", curline );
86 fwrite( tokstart, 1, tokend-tokstart, stdout );
93 %% write data nofinal;
99 static char buf[BUFSIZE];
100 int cs, act, have = 0, curline = 1;
101 char *tokstart, *tokend = 0;
107 char *p = buf + have, *pe;
108 int len, space = BUFSIZE - have;
111 /* We've used up the entire buffer storing an already-parsed token
112 * prefix that must be preserved. */
113 fprintf(stderr, "OUT OF BUFFER SPACE\n" );
117 len = fread( p, 1, space, stdin );
119 /* If this is the last buffer, tack on an EOF. */
128 if ( cs == clang_error ) {
129 fprintf(stderr, "PARSE ERROR\n" );
136 /* There is a prefix to preserve, shift it over. */
137 have = pe - tokstart;
138 memmove( buf, tokstart, have );
139 tokend = buf + (tokend-tokstart);