--- /dev/null
+/*
+ * @LANG: indep
+ */
+
+ptr tokstart;
+ptr tokend;
+char array[32];
+int pos;
+int line;
+%%
+pos = 0;
+line = 1;
+%%{
+ machine clang;
+
+ # Function to buffer a character.
+ action bufChar { array[pos] = fc; pos = pos + 1; }
+
+ # Function to clear the buffer.
+ action clearBuf { pos = 0; }
+
+ # Functions to dump tokens as they are matched.
+ action ident {
+ prints "ident(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action literal {
+ prints "literal(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action float {
+ prints "float(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action integer {
+ prints "int(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action hex {
+ prints "hex(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action symbol {
+ prints "symbol(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+
+ # Alpha numberic characters or underscore.
+ alnumu = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alphau = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ ident = (alphau . alnumu*) >clearBuf $bufChar %ident;
+
+ # Match single characters inside literal strings. Or match
+ # an escape sequence. Buffers the charater matched.
+ sliteralChar =
+ ( extend - ['\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+ dliteralChar =
+ ( extend - ["\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+
+ # Single quote and double quota literals. At the start clear
+ # the buffer. Upon leaving dump the literal.
+ sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
+ dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
+ literal = sliteral | dliteral;
+
+ # Whitespace is standard ws, newlines and control codes.
+ whitespace = any - 33 .. 126;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ ccComment = '//' . extend* $0 . '\n' @1;
+ cComment = '/\*' . extend* $0 . '\*/' @1;
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ integer = digit+ %integer;
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ hex = '0x' . xdigit+ >clearBuf $bufChar %hex;
+
+ # Or together all the lanuage elements.
+ fin = ( ccComment |
+ cComment |
+ symbol |
+ ident |
+ literal |
+ whitespace |
+ integer |
+ float |
+ hex );
+
+ # Star the language elements. It is critical in this type of application
+ # that we decrease the priority of out transitions before doing so. This
+ # is so that when we see 'aa' we stay in the fin machine to match an ident
+ # of length two and not wrap around to the front to match two idents of
+ # length one.
+ clang_main = ( fin $1 %0 )*;
+
+ # This machine matches everything, taking note of newlines.
+ newline = ( any | '\n' @{ line = line + 1; } )*;
+
+ # The final fsm is the lexer intersected with the newline machine which
+ # will count lines for us. Since the newline machine accepts everything,
+ # the strings accepted is goverened by the clang_main machine, onto which
+ # the newline machine overlays line counting.
+ main := clang_main & newline;
+}%%
+/* _____INPUT_____
+"999 0xaAFF99 99.99 *\n 'lksdj' //\n\"\n\nliteral\n\n\"\n0x00aba foobardd.ddsf 0x0.9\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+int(1,3): 999
+hex(1,6): aAFF99
+float(1,5): 99.99
+symbol(1,1): *
+literal(2,5): lksdj
+literal(7,11):
+
+literal
+
+
+hex(8,5): 00aba
+ident(8,8): foobardd
+symbol(8,1): .
+ident(8,4): ddsf
+hex(8,1): 0
+symbol(8,1): .
+int(8,1): 9
+ACCEPT
+_____OUTPUT_____ */
+
'fputs '( String , 'stdout ');
end function
+function alStmtToC4c AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printb Id [id] ';
+ replace [repeat c_lang_stmt]
+ by
+ 'fwrite '( Id ', '1 ', 'pos ', 'stdout ');
+end function
+
function alStmtToC5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToC3 FirstStmt]
[alStmtToC4a FirstStmt]
[alStmtToC4b FirstStmt]
+ [alStmtToC4c FirstStmt]
[alStmtToC5 FirstStmt]
[alStmtToC6 FirstStmt]
construct RestC [repeat c_lang_stmt]
{
char *p = data;
char *pe = data + len;
+ char _s[];
+
%% write exec;
}
'printi Id [id] ';
replace [repeat d_lang_stmt]
by
- 'writef '( '"%d" ', Id ');
+ 'writef '( '"%d" ', Id ') ';
end function
function alStmtToD4b AlStmt [action_lang_stmt]
'prints String [stringlit] ';
replace [repeat d_lang_stmt]
by
- 'writef '( '"%s" ', String ');
+ 'writef '( '"%s" ', String ') ';
+end function
+
+function alStmtToD4c AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printb Id [id] ';
+ replace [repeat d_lang_stmt]
+ by
+ '_s '= Id '[0..pos] ';
+ 'writef '( '"%s" ', '_s ') ';
end function
function alStmtToD5 AlStmt [action_lang_stmt]
[alStmtToD3 FirstStmt]
[alStmtToD4a FirstStmt]
[alStmtToD4b FirstStmt]
+ [alStmtToD4c FirstStmt]
[alStmtToD5 FirstStmt]
[alStmtToD6 FirstStmt]
construct DRest [repeat d_lang_stmt]
{
int p = 0;
int pe = len;
+ String _s;
%% write exec;
}
define java_type_decl
[al_type_decl]
| 'boolean
+ | 'String
end define
define java_expr_stmt
| [id] [repeat java_dot_id]
| [id] [repeat java_dot_id] '( [java_args] ')
| 'new [java_type_decl] [union]
+ | 'new [java_type_decl] '( [java_args] ')
end define
define java_dot_id
'System '. 'out '. 'print '( String ');
end function
+function alStmtToJava4c AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printb Id [id] ';
+ replace [repeat java_lang_stmt]
+ by
+ '_s '= 'new 'String '( Id ', '0 ', 'pos ') ';
+ 'System '. 'out '. 'print '( '_s ');
+end function
+
function alStmtToJava5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToJava3 FirstStmt]
[alStmtToJava4a FirstStmt]
[alStmtToJava4b FirstStmt]
+ [alStmtToJava4c FirstStmt]
[alStmtToJava5 FirstStmt]
[alStmtToJava6 FirstStmt]
construct JavaRest [repeat java_lang_stmt]
function initDecl5 VarDecl [al_variable_decl]
deconstruct VarDecl
- 'int Id [id] Union [union] ';
+ Type [al_type_decl] Id [id] Union [union] ';
replace [repeat ruby_lang_stmt]
by
- Id '= '[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ';
+ Id '= '[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ';
end function
'print '( String ') ';
end function
+function alStmtToRuby4c AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printb Id [id] ';
+ replace [repeat ruby_lang_stmt]
+ by
+ '_a = Id '[0..pos-1] ';
+ 'print '( '_a '. 'pack '( '"c*" ') ') ';
+end function
+
function alStmtToRuby5 AlStmt [action_lang_stmt]
deconstruct AlStmt
'{ AlSubStmts [repeat action_lang_stmt] '}
[alStmtToRuby3 FirstStmt]
[alStmtToRuby4a FirstStmt]
[alStmtToRuby4b FirstStmt]
+ [alStmtToRuby4c FirstStmt]
[alStmtToRuby5 FirstStmt]
[alStmtToRuby6 FirstStmt]
[fixCharLit]
fi
echo "$allow_genflags" | grep -e $grep_gen_opt >/dev/null || continue
- if [ $genflags == noflags ]; then
+ if [ "$genflags" == noflags ]; then
be_gen_opt=""
else
be_gen_opt=$gen_opt
end define
define print_cmd
- 'printi | 'prints
+ 'printi | 'prints | 'printb
end define
define al_variable_decl
end define
define al_term
+ [al_term_base] [opt union]
+end define
+
+define al_term_base
[id]
| [opt al_sign] [number]
- | [stringlit] [opt union]
+ | [stringlit]
| [charlit]
| 'fc
| 'true