Added a language-independent test based on cppscan3.rl. Added the necessary
authorthurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Sat, 7 Apr 2007 05:06:14 +0000 (05:06 +0000)
committerthurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Sat, 7 Apr 2007 05:06:14 +0000 (05:06 +0000)
transformations as well.

git-svn-id: http://svn.complang.org/ragel/trunk@176 052ea7fc-9027-0410-9066-f65837a77df0

test/clang4.rl
test/cppscan6.rl [new file with mode: 0644]
test/langtrans_c.txl
test/langtrans_d.txl
test/langtrans_java.txl
test/langtrans_ruby.txl
test/testcase.txl

index bce769b..cdd0c44 100644 (file)
@@ -111,7 +111,7 @@ line = 1;
        # priority bump on tne terminator of the comments brings us
        # out of the extend* which matches everything.
        ccComment = '//' . extend* $0 . '\n' @1;
-       cComment = '/\*' . extend* $0 . '\*/' @1;
+       cComment = '/!' . extend* $0 . '!/' @1;
 
        # Match an integer. We don't bother clearing the buf or filling it.
        # The float machine overlaps with int and it will do it.
@@ -153,19 +153,20 @@ line = 1;
        main := clang_main & newline;
 }%%
 /* _____INPUT_____
-"999 0xaAFF99 99.99 *\n 'lksdj' //\n\"\n\nliteral\n\n\"\n0x00aba foobardd.ddsf 0x0.9\n"
+"999 0xaAFF99 99.99 /!\n!/ 'lksdj' //\n\"\n\nliteral\n\n\n\"0x00aba foobardd.ddsf 0x0.9\n"
+"wordwithnum00asdf\n000wordfollowsnum,makes new symbol\n\nfinishing early /! unfinished ...\n"
 _____INPUT_____ */
 /* _____OUTPUT_____
 int(1,3): 999
 hex(1,6): aAFF99
 float(1,5): 99.99
-symbol(1,1): *
 literal(2,5): lksdj
-literal(7,11): 
+literal(8,12): 
 
 literal
 
 
+
 hex(8,5): 00aba
 ident(8,8): foobardd
 symbol(8,1): .
@@ -174,5 +175,15 @@ hex(8,1): 0
 symbol(8,1): .
 int(8,1): 9
 ACCEPT
+ident(1,17): wordwithnum00asdf
+int(2,3): 000
+ident(2,14): wordfollowsnum
+symbol(2,1): ,
+ident(2,5): makes
+ident(2,3): new
+ident(2,6): symbol
+ident(4,9): finishing
+ident(4,5): early
+FAIL
 _____OUTPUT_____ */
 
diff --git a/test/cppscan6.rl b/test/cppscan6.rl
new file mode 100644 (file)
index 0000000..c0fde39
--- /dev/null
@@ -0,0 +1,358 @@
+/*
+ * @LANG: indep
+ *
+ * const char *data = tokstart;
+ * int len = tokend - tokstart;
+ * cout << "<" << tok << "> ";
+ * for ( int i = 0; i < len; i++ )
+ *     cout << data[i];
+ * cout << '\n';
+ */
+ptr tokstart;
+ptr tokend;
+int act;
+int token;
+%%
+%%{
+       machine scanner;
+
+       action comment {
+               token = 242;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       }
+
+
+       main := |*
+
+       # Single and double literals.
+       ( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" ) 
+               => { 
+                       token = 193;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+       ( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' ) 
+               => { 
+                       token = 192;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+
+       # Identifiers
+       ( [a-zA-Z_] [a-zA-Z0-9_]* ) 
+               =>{
+                       token = 195;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+
+       # Floating literals.
+       fract_const = digit* '.' digit+ | digit+ '.';
+       exponent = [eE] [+\-]? digit+;
+       float_suffix = [flFL];
+
+       ( fract_const exponent? float_suffix? |
+               digit+ exponent float_suffix? ) 
+               => {
+                       token = 194;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+       
+       # Integer decimal. Leading part buffered by float.
+       ( ( '0' | [1-9] [0-9]* ) [ulUL]? ) 
+               => {
+                       token = 218;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+
+       # Integer octal. Leading part buffered by float.
+       ( '0' [0-9]+ [ulUL]? ) 
+               => {
+                       token = 219;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+
+       # Integer hex. Leading 0 buffered by float.
+       ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]? ) ) 
+               => {
+                       token = 220;
+                       prints "<";
+                       printi token;
+                       prints "> ";
+                       print_token;
+                       prints "\n";
+               };
+
+       # Only buffer the second item, first buffered by symbol.
+       '::' => {
+               token = 197;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '==' => {
+               token = 223;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '!=' => {
+               token = 224;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '&&' => {
+               token = 225;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '||' => {
+               token = 226;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '*=' => {
+               token = 227;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '/=' => {
+               token = 228;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '%=' => {
+               token = 229;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '+=' => {
+               token = 230;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '-=' => {
+               token = 231;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '&=' => {
+               token = 232;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '^=' => {
+               token = 233;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '|=' => {
+               token = 234;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '++' => {
+               token = 212;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '--' => {
+               token = 213;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '->' => {
+               token = 211;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '->*' => {
+               token = 214;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       '.*' => {
+               token = 215;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+
+       # Three char compounds, first item already buffered.
+       '...' => {
+               token = 240;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+
+       # Single char symbols.
+       ( punct - [_"'] ) => {
+               token = first_token_char;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+
+       # Comments and whitespace.
+       '/!' ( any* $0 '!/' @1 ) => comment;
+       '//' ( any* $0 '\n' @1 ) => comment;
+       ( any - 33..126 )+ => { 
+               token = 241;
+               prints "<";
+               printi token;
+               prints "> ";
+               print_token;
+               prints "\n";
+       };
+       *|;
+}%%
+/* _____INPUT_____
+"\"\\\"hi\" /!\n!/\n44 .44\n44. 44\n44 . 44\n44.44\n_hithere22\n\n"
+"'\\''\"\\n\\d'\\\"\"\nhi\n99\n.99\n99e-4\n->*\n||\n0x98\n0x\n//\n/! * !/\n"
+"'\n'\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+<192> "\"hi"
+<241>  
+<242> /!
+!/
+<241> 
+
+<218> 44
+<241>  
+<194> .44
+<241> 
+
+<194> 44.
+<241>  
+<218> 44
+<241> 
+
+<218> 44
+<241>  
+<46> .
+<241>  
+<218> 44
+<241> 
+
+<194> 44.44
+<241> 
+
+<195> _hithere22
+ACCEPT
+<193> '\''
+<192> "\n\d'\""
+<241> 
+
+<195> hi
+<241> 
+
+<218> 99
+<241> 
+
+<194> .99
+<241> 
+
+<194> 99e-4
+<241> 
+
+<214> ->*
+<241> 
+
+<226> ||
+<241> 
+
+<220> 0x98
+<241> 
+
+<218> 0
+<195> x
+<241> 
+
+<242> //
+
+<242> /! * !/
+ACCEPT
+FAIL
+_____OUTPUT_____ */
index b6abf93..681baa2 100644 (file)
@@ -116,6 +116,13 @@ function alStmtToC1 AlStmt [action_lang_stmt]
                Result
 end function
 
+function alTermToC
+       replace [al_term]
+               'first_token_char
+       by
+               'tokstart '[0]
+end function
+
 function alExprExtendToC AlExprExtend [repeat al_expr_extend]
        deconstruct AlExprExtend
                Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
@@ -123,7 +130,7 @@ function alExprExtendToC AlExprExtend [repeat al_expr_extend]
                _ [alExprExtendToC Rest]
        replace [repeat c_expr_extend]
        by
-               Op Term RestC
+               Op Term [alTermToC] RestC
 end function
 
 function alExprToC AlExpr [al_expr]
@@ -132,7 +139,7 @@ function alExprToC AlExpr [al_expr]
        construct CExprExtend [repeat c_expr_extend]
                _ [alExprExtendToC AlExprExtend]
        construct Result [opt c_expr]
-               ALTerm CExprExtend
+               ALTerm [alTermToC] CExprExtend
        replace [opt c_expr]
        by
                Result [boolVals1] [boolVals2]
@@ -214,6 +221,14 @@ function alStmtToC4c AlStmt [action_lang_stmt]
                'fwrite '( Id ', '1 ', 'pos ', 'stdout ');
 end function
 
+function alStmtToC4d AlStmt [action_lang_stmt]
+       deconstruct AlStmt
+               'print_token ';
+       replace [repeat c_lang_stmt]
+       by
+               'fwrite '( 'tokstart ', '1 ', 'tokend '- 'tokstart ', 'stdout ');
+end function
+
 function alStmtToC5 AlStmt [action_lang_stmt]
        deconstruct AlStmt
                '{ AlSubStmts [repeat action_lang_stmt] '}
@@ -243,6 +258,7 @@ function alToC AlStmts [repeat action_lang_stmt]
                        [alStmtToC4a FirstStmt]
                        [alStmtToC4b FirstStmt]
                        [alStmtToC4c FirstStmt]
+                       [alStmtToC4d FirstStmt]
                        [alStmtToC5 FirstStmt]
                        [alStmtToC6 FirstStmt]
        construct RestC [repeat c_lang_stmt]
index b645bb5..bff4a08 100644 (file)
@@ -95,6 +95,13 @@ function alStmtToD1 AlStmt [action_lang_stmt]
                Result
 end function
 
+function alTermToD
+       replace [al_term]
+               'first_token_char
+       by
+               'tokstart '[0]
+end function
+
 function alExprExtendToD AlExprExtend [repeat al_expr_extend]
        deconstruct AlExprExtend
                Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
@@ -102,7 +109,7 @@ function alExprExtendToD AlExprExtend [repeat al_expr_extend]
                _ [alExprExtendToD Rest]
        replace [repeat d_expr_extend]
        by
-               Op Term DRest
+               Op Term [alTermToD] DRest
 end function
 
 function alExprToD AlExpr [al_expr]
@@ -111,7 +118,7 @@ function alExprToD AlExpr [al_expr]
        construct DExprExtend [repeat d_expr_extend]
                _ [alExprExtendToD AlExprExtend]
        construct Result [opt d_expr]
-               ALTerm DExprExtend
+               ALTerm [alTermToD] DExprExtend
        replace [opt d_expr]
        by
                Result
@@ -194,6 +201,15 @@ function alStmtToD4c AlStmt [action_lang_stmt]
                'writef '( '"%s" ', '_s ') ';
 end function
 
+function alStmtToD4d AlStmt [action_lang_stmt]
+       deconstruct AlStmt
+               'print_token ';
+       replace [repeat d_lang_stmt]
+       by
+               '_s '= tokstart '[0..(tokend-tokstart)] ';
+               'writef '( '"%s" ', '_s ') ';
+end function
+
 function alStmtToD5 AlStmt [action_lang_stmt]
        deconstruct AlStmt
                '{ AlSubStmts [repeat action_lang_stmt] '}
@@ -223,6 +239,7 @@ function alToD AlStmts [repeat action_lang_stmt]
                        [alStmtToD4a FirstStmt]
                        [alStmtToD4b FirstStmt]
                        [alStmtToD4c FirstStmt]
+                       [alStmtToD4d FirstStmt]
                        [alStmtToD5 FirstStmt]
                        [alStmtToD6 FirstStmt]
        construct DRest [repeat d_lang_stmt]
index 6a988aa..6fcf9c3 100644 (file)
@@ -126,6 +126,13 @@ function alStmtToJava1 AlStmt [action_lang_stmt]
                Result
 end function
 
+function alTermToJava
+       replace [al_term]
+               'first_token_char
+       by
+               'data '[tokstart]
+end function
+
 function alExprExtendToJava AlExprExtend [repeat al_expr_extend]
        deconstruct AlExprExtend
                Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
@@ -133,7 +140,7 @@ function alExprExtendToJava AlExprExtend [repeat al_expr_extend]
                _ [alExprExtendToJava Rest]
        replace [repeat java_expr_extend]
        by
-               Op Term JavaRest
+               Op Term [alTermToJava] JavaRest
 end function
 
 function alExprToJava AlExpr [al_expr]
@@ -142,7 +149,7 @@ function alExprToJava AlExpr [al_expr]
        construct JavaExprExtend [repeat java_expr_extend]
                _ [alExprExtendToJava AlExprExtend]
        construct Result [opt java_expr]
-               ALTerm JavaExprExtend
+               ALTerm [alTermToJava] JavaExprExtend
        replace [opt java_expr]
        by
                Result 
@@ -225,6 +232,15 @@ function alStmtToJava4c AlStmt [action_lang_stmt]
                'System '. 'out '. 'print '( '_s ');
 end function
 
+function alStmtToJava4d AlStmt [action_lang_stmt]
+       deconstruct AlStmt
+               'print_token ';
+       replace [repeat java_lang_stmt]
+       by
+               '_s '= 'new 'String '( 'data ', 'tokstart ', 'tokend '- 'tokstart ') ';
+               'System '. 'out '. 'print '( '_s ');
+end function
+
 function alStmtToJava5 AlStmt [action_lang_stmt]
        deconstruct AlStmt
                '{ AlSubStmts [repeat action_lang_stmt] '}
@@ -255,6 +271,7 @@ function alToJava AlStmts [repeat action_lang_stmt]
                        [alStmtToJava4a FirstStmt]
                        [alStmtToJava4b FirstStmt]
                        [alStmtToJava4c FirstStmt]
+                       [alStmtToJava4d FirstStmt]
                        [alStmtToJava5 FirstStmt]
                        [alStmtToJava6 FirstStmt]
        construct JavaRest [repeat java_lang_stmt]
index 3604a2c..009baae 100644 (file)
@@ -135,6 +135,13 @@ function alStmtToRuby1 AlStmt [action_lang_stmt]
                        [initDecl5 VarDecl]
 end function
 
+function alTermToRuby
+       replace [al_term]
+               'first_token_char
+       by
+               'data '[tokstart]
+end function
+
 function alExprExtendToRuby AlExprExtend [repeat al_expr_extend]
        deconstruct AlExprExtend
                Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
@@ -142,7 +149,7 @@ function alExprExtendToRuby AlExprExtend [repeat al_expr_extend]
                _ [alExprExtendToRuby Rest]
        replace [repeat ruby_expr_extend]
        by
-               Op Term RubyRest
+               Op Term [alTermToRuby] RubyRest
 end function
 
 % Note: this doesn't go into the ( al_expr ) form of al_term.
@@ -152,7 +159,7 @@ function alExprToRuby AlExpr [al_expr]
        construct RubyExprExtend [repeat ruby_expr_extend]
                _ [alExprExtendToRuby AlExprExtend]
        construct Result [opt ruby_expr]
-               ALTerm RubyExprExtend
+               ALTerm [alTermToRuby] RubyExprExtend
        replace [opt ruby_expr]
        by
                Result 
@@ -241,6 +248,14 @@ function alStmtToRuby4c AlStmt [action_lang_stmt]
                'print '( '_a '. 'pack '( '"c*" ')  ') ';
 end function
 
+function alStmtToRuby4d AlStmt [action_lang_stmt]
+       deconstruct AlStmt
+               'print_token ';
+       replace [repeat ruby_lang_stmt]
+       by
+               'print '( 'data '[tokstart..tokend-1] ') ';
+end function
+
 function alStmtToRuby5 AlStmt [action_lang_stmt]
        deconstruct AlStmt
                '{ AlSubStmts [repeat action_lang_stmt] '}
@@ -286,6 +301,7 @@ function alToRuby AlStmts [repeat action_lang_stmt]
                        [alStmtToRuby4a FirstStmt]
                        [alStmtToRuby4b FirstStmt]
                        [alStmtToRuby4c FirstStmt]
+                       [alStmtToRuby4d FirstStmt]
                        [alStmtToRuby5 FirstStmt]
                        [alStmtToRuby6 FirstStmt]
                        [fixCharLit]
index 9b19e83..ff9bf43 100644 (file)
@@ -15,9 +15,10 @@ end compounds
 
 keys
        'int 'bool 'true 'false 'char 'ptr
-       'if 'else 'printi 'prints 
+       'if 'else 'printi 'prints 'printb 'print_token
        'fc 'fpc 'fbreak 'fgoto 'fcall 'fret 'fhold 'fexec
        'machine 'alphtype 'action
+       'first_token_char
 end keys
 
 define lang_indep
@@ -77,6 +78,7 @@ end define
 
 define al_print_stmt
                [print_cmd] [al_expr] '; [NL]
+       |       'print_token '; [NL]
 end define
 
 define print_cmd
@@ -124,6 +126,7 @@ define al_term_base
        |       'true
        |       'false
        |       '( [al_expr] ')
+       |       'first_token_char
 end define
 
 define al_sign