# priority bump on tne terminator of the comments brings us
# out of the extend* which matches everything.
ccComment = '//' . extend* $0 . '\n' @1;
- cComment = '/\*' . extend* $0 . '\*/' @1;
+ cComment = '/!' . extend* $0 . '!/' @1;
# Match an integer. We don't bother clearing the buf or filling it.
# The float machine overlaps with int and it will do it.
main := clang_main & newline;
}%%
/* _____INPUT_____
-"999 0xaAFF99 99.99 *\n 'lksdj' //\n\"\n\nliteral\n\n\"\n0x00aba foobardd.ddsf 0x0.9\n"
+"999 0xaAFF99 99.99 /!\n!/ 'lksdj' //\n\"\n\nliteral\n\n\n\"0x00aba foobardd.ddsf 0x0.9\n"
+"wordwithnum00asdf\n000wordfollowsnum,makes new symbol\n\nfinishing early /! unfinished ...\n"
_____INPUT_____ */
/* _____OUTPUT_____
int(1,3): 999
hex(1,6): aAFF99
float(1,5): 99.99
-symbol(1,1): *
literal(2,5): lksdj
-literal(7,11):
+literal(8,12):
literal
+
hex(8,5): 00aba
ident(8,8): foobardd
symbol(8,1): .
symbol(8,1): .
int(8,1): 9
ACCEPT
+ident(1,17): wordwithnum00asdf
+int(2,3): 000
+ident(2,14): wordfollowsnum
+symbol(2,1): ,
+ident(2,5): makes
+ident(2,3): new
+ident(2,6): symbol
+ident(4,9): finishing
+ident(4,5): early
+FAIL
_____OUTPUT_____ */
--- /dev/null
+/*
+ * @LANG: indep
+ *
+ * const char *data = tokstart;
+ * int len = tokend - tokstart;
+ * cout << "<" << tok << "> ";
+ * for ( int i = 0; i < len; i++ )
+ * cout << data[i];
+ * cout << '\n';
+ */
+ptr tokstart;
+ptr tokend;
+int act;
+int token;
+%%
+%%{
+ machine scanner;
+
+ action comment {
+ token = 242;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ }
+
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" )
+ => {
+ token = 193;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ ( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' )
+ => {
+ token = 192;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ =>{
+ token = 195;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? )
+ => {
+ token = 194;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]? )
+ => {
+ token = 218;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]? )
+ => {
+ token = 219;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]? ) )
+ => {
+ token = 220;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Only buffer the second item, first buffered by symbol.
+ '::' => {
+ token = 197;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '==' => {
+ token = 223;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '!=' => {
+ token = 224;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '&&' => {
+ token = 225;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '||' => {
+ token = 226;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '*=' => {
+ token = 227;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '/=' => {
+ token = 228;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '%=' => {
+ token = 229;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '+=' => {
+ token = 230;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '-=' => {
+ token = 231;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '&=' => {
+ token = 232;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '^=' => {
+ token = 233;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '|=' => {
+ token = 234;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '++' => {
+ token = 212;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '--' => {
+ token = 213;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '->' => {
+ token = 211;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '->*' => {
+ token = 214;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ '.*' => {
+ token = 215;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Three char compounds, first item already buffered.
+ '...' => {
+ token = 240;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Single char symbols.
+ ( punct - [_"'] ) => {
+ token = first_token_char;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+
+ # Comments and whitespace.
+ '/!' ( any* $0 '!/' @1 ) => comment;
+ '//' ( any* $0 '\n' @1 ) => comment;
+ ( any - 33..126 )+ => {
+ token = 241;
+ prints "<";
+ printi token;
+ prints "> ";
+ print_token;
+ prints "\n";
+ };
+ *|;
+}%%
+/* _____INPUT_____
+"\"\\\"hi\" /!\n!/\n44 .44\n44. 44\n44 . 44\n44.44\n_hithere22\n\n"
+"'\\''\"\\n\\d'\\\"\"\nhi\n99\n.99\n99e-4\n->*\n||\n0x98\n0x\n//\n/! * !/\n"
+"'\n'\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+<192> "\"hi"
+<241>
+<242> /!
+!/
+<241>
+
+<218> 44
+<241>
+<194> .44
+<241>
+
+<194> 44.
+<241>
+<218> 44
+<241>
+
+<218> 44
+<241>
+<46> .
+<241>
+<218> 44
+<241>
+
+<194> 44.44
+<241>
+
+<195> _hithere22
+ACCEPT
+<193> '\''
+<192> "\n\d'\""
+<241>
+
+<195> hi
+<241>
+
+<218> 99
+<241>
+
+<194> .99
+<241>
+
+<194> 99e-4
+<241>
+
+<214> ->*
+<241>
+
+<226> ||
+<241>
+
+<220> 0x98
+<241>
+
+<218> 0
+<195> x
+<241>
+
+<242> //
+
+<242> /! * !/
+ACCEPT
+FAIL
+_____OUTPUT_____ */
Result
end function
+function alTermToC
+ replace [al_term]
+ 'first_token_char
+ by
+ 'tokstart '[0]
+end function
+
function alExprExtendToC AlExprExtend [repeat al_expr_extend]
deconstruct AlExprExtend
Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
_ [alExprExtendToC Rest]
replace [repeat c_expr_extend]
by
- Op Term RestC
+ Op Term [alTermToC] RestC
end function
function alExprToC AlExpr [al_expr]
construct CExprExtend [repeat c_expr_extend]
_ [alExprExtendToC AlExprExtend]
construct Result [opt c_expr]
- ALTerm CExprExtend
+ ALTerm [alTermToC] CExprExtend
replace [opt c_expr]
by
Result [boolVals1] [boolVals2]
'fwrite '( Id ', '1 ', 'pos ', 'stdout ');
end function
+function alStmtToC4d AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'print_token ';
+ replace [repeat c_lang_stmt]
+ by
+ 'fwrite '( 'tokstart ', '1 ', 'tokend '- 'tokstart ', 'stdout ');
+end function
+
function alStmtToC5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToC4a FirstStmt]
[alStmtToC4b FirstStmt]
[alStmtToC4c FirstStmt]
+ [alStmtToC4d FirstStmt]
[alStmtToC5 FirstStmt]
[alStmtToC6 FirstStmt]
construct RestC [repeat c_lang_stmt]
Result
end function
+function alTermToD
+ replace [al_term]
+ 'first_token_char
+ by
+ 'tokstart '[0]
+end function
+
function alExprExtendToD AlExprExtend [repeat al_expr_extend]
deconstruct AlExprExtend
Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
_ [alExprExtendToD Rest]
replace [repeat d_expr_extend]
by
- Op Term DRest
+ Op Term [alTermToD] DRest
end function
function alExprToD AlExpr [al_expr]
construct DExprExtend [repeat d_expr_extend]
_ [alExprExtendToD AlExprExtend]
construct Result [opt d_expr]
- ALTerm DExprExtend
+ ALTerm [alTermToD] DExprExtend
replace [opt d_expr]
by
Result
'writef '( '"%s" ', '_s ') ';
end function
+function alStmtToD4d AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'print_token ';
+ replace [repeat d_lang_stmt]
+ by
+ '_s '= tokstart '[0..(tokend-tokstart)] ';
+ 'writef '( '"%s" ', '_s ') ';
+end function
+
function alStmtToD5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToD4a FirstStmt]
[alStmtToD4b FirstStmt]
[alStmtToD4c FirstStmt]
+ [alStmtToD4d FirstStmt]
[alStmtToD5 FirstStmt]
[alStmtToD6 FirstStmt]
construct DRest [repeat d_lang_stmt]
Result
end function
+function alTermToJava
+ replace [al_term]
+ 'first_token_char
+ by
+ 'data '[tokstart]
+end function
+
function alExprExtendToJava AlExprExtend [repeat al_expr_extend]
deconstruct AlExprExtend
Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
_ [alExprExtendToJava Rest]
replace [repeat java_expr_extend]
by
- Op Term JavaRest
+ Op Term [alTermToJava] JavaRest
end function
function alExprToJava AlExpr [al_expr]
construct JavaExprExtend [repeat java_expr_extend]
_ [alExprExtendToJava AlExprExtend]
construct Result [opt java_expr]
- ALTerm JavaExprExtend
+ ALTerm [alTermToJava] JavaExprExtend
replace [opt java_expr]
by
Result
'System '. 'out '. 'print '( '_s ');
end function
+function alStmtToJava4d AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'print_token ';
+ replace [repeat java_lang_stmt]
+ by
+ '_s '= 'new 'String '( 'data ', 'tokstart ', 'tokend '- 'tokstart ') ';
+ 'System '. 'out '. 'print '( '_s ');
+end function
+
function alStmtToJava5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToJava4a FirstStmt]
[alStmtToJava4b FirstStmt]
[alStmtToJava4c FirstStmt]
+ [alStmtToJava4d FirstStmt]
[alStmtToJava5 FirstStmt]
[alStmtToJava6 FirstStmt]
construct JavaRest [repeat java_lang_stmt]
[initDecl5 VarDecl]
end function
+function alTermToRuby
+ replace [al_term]
+ 'first_token_char
+ by
+ 'data '[tokstart]
+end function
+
function alExprExtendToRuby AlExprExtend [repeat al_expr_extend]
deconstruct AlExprExtend
Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
_ [alExprExtendToRuby Rest]
replace [repeat ruby_expr_extend]
by
- Op Term RubyRest
+ Op Term [alTermToRuby] RubyRest
end function
% Note: this doesn't go into the ( al_expr ) form of al_term.
construct RubyExprExtend [repeat ruby_expr_extend]
_ [alExprExtendToRuby AlExprExtend]
construct Result [opt ruby_expr]
- ALTerm RubyExprExtend
+ ALTerm [alTermToRuby] RubyExprExtend
replace [opt ruby_expr]
by
Result
'print '( '_a '. 'pack '( '"c*" ') ') ';
end function
+function alStmtToRuby4d AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'print_token ';
+ replace [repeat ruby_lang_stmt]
+ by
+ 'print '( 'data '[tokstart..tokend-1] ') ';
+end function
+
function alStmtToRuby5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToRuby4a FirstStmt]
[alStmtToRuby4b FirstStmt]
[alStmtToRuby4c FirstStmt]
+ [alStmtToRuby4d FirstStmt]
[alStmtToRuby5 FirstStmt]
[alStmtToRuby6 FirstStmt]
[fixCharLit]
keys
'int 'bool 'true 'false 'char 'ptr
- 'if 'else 'printi 'prints
+ 'if 'else 'printi 'prints 'printb 'print_token
'fc 'fpc 'fbreak 'fgoto 'fcall 'fret 'fhold 'fexec
'machine 'alphtype 'action
+ 'first_token_char
end keys
define lang_indep
define al_print_stmt
[print_cmd] [al_expr] '; [NL]
+ | 'print_token '; [NL]
end define
define print_cmd
| 'true
| 'false
| '( [al_expr] ')
+ | 'first_token_char
end define
define al_sign