}
-void Scanner::ScanEscape() {
+bool Scanner::ScanEscape() {
uc32 c = c0_;
Advance();
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
// Allow LF+CR newlines in multiline string literals.
if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
- return;
+ return true;
}
switch (c) {
case 't' : c = '\t'; break;
case 'u' : {
c = ScanHexNumber(4);
- if (c < 0) c = 'u';
+ if (c < 0) return false;
break;
}
case 'v' : c = '\v'; break;
case 'x' : {
c = ScanHexNumber(2);
- if (c < 0) c = 'x';
+ if (c < 0) return false;
break;
}
case '0' : // fall through
case '7' : c = ScanOctalEscape(c, 2); break;
}
- // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
- // should be illegal, but they are commonly handled
- // as non-escaped characters by JS VMs.
+ // According to ECMA-262, section 7.8.4, characters not covered by the
+ // above cases should be illegal, but they are commonly handled as
+ // non-escaped characters by JS VMs.
AddLiteralChar(c);
+ return true;
}
uc32 c = c0_;
Advance();
if (c == '\\') {
- if (c0_ < 0) return Token::ILLEGAL;
- ScanEscape();
+ if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
} else {
AddLiteralChar(c);
}
Token::Value ScanIdentifierOrKeyword();
Token::Value ScanIdentifierSuffix(LiteralScope* literal);
- void ScanEscape();
Token::Value ScanString();
- // Decodes a unicode escape-sequence which is part of an identifier.
+ // Scans an escape-sequence which is part of a string and adds the
+ // decoded character to the current literal. Returns true if a pattern
+ // is scanned.
+ bool ScanEscape();
+ // Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
- // Recognizes a uniocde escape-sequence and adds its characters,
+ // Scans a Unicode escape-sequence and adds its characters,
// uninterpreted, to the current literal. Used for parsing RegExp
// flags.
bool ScanLiteralUnicodeEscape();
// Characters just outside the ranges of hex-escapes.
// "/" comes just before "0".
-assertEquals("x1/", "\x1/");
-assertEquals("u111/", "\u111/");
+assertThrows('"\\x1/"');
+assertThrows('"\\u111/"');
assertEquals("\\x1/", RegExp("\\x1/").source);
assertEquals("\\u111/", RegExp("\\u111/").source);
// ":" comes just after "9".
-assertEquals("x1:", "\x1:");
-assertEquals("u111:", "\u111:");
+assertThrows('"\\x1:"');
+assertThrows('"\\u111:"');
assertEquals("\\x1:", /\x1:/.source);
assertEquals("\\u111:", /\u111:/.source);
// "`" comes just before "a".
-assertEquals("x1`", "\x1`");
-assertEquals("u111`", "\u111`");
+assertThrows('"\\x1`"');
+assertThrows('"\\u111`"');
assertEquals("\\x1`", /\x1`/.source);
assertEquals("\\u111`", /\u111`/.source);
// "g" comes just before "f".
-assertEquals("x1g", "\x1g");
-assertEquals("u111g", "\u111g");
+assertThrows('"\\x1g"');
+assertThrows('"\\u111g"');
assertEquals("\\x1g", /\x1g/.source);
assertEquals("\\u111g", /\u111g/.source);
// "@" comes just before "A".
-assertEquals("x1@", "\x1@");
-assertEquals("u111@", "\u111@");
+assertThrows('"\\x1@"');
+assertThrows('"\\u111@"');
assertEquals("\\x1@", /\x1@/.source);
assertEquals("\\u111@", /\u111@/.source);
// "G" comes just after "F".
-assertEquals("x1G", "\x1G");
-assertEquals("u111G", "\u111G");
+assertThrows('"\\x1G"');
+assertThrows('"\\u111G"');
assertEquals("\\x1G", /\x1G/.source);
assertEquals("\\u111G", /\u111G/.source);
js1_5/Regress/regress-416737-02: FAIL_OK
+# Illegal escape-sequences in string literals. Has already been fixed
+# by most engines (i.e. V8, JSC, Opera and FF).
+ecma/Array/15.4.5.1-1: FAIL_OK
+ecma/LexicalConventions/7.7.4: FAIL_OK
+ecma_2/RegExp/hex-001: FAIL_OK
+js1_2/regexp/hexadecimal: FAIL_OK
+
+
##################### FAILING TESTS #####################
# This section is for tests that fail in V8 and pass in JSC.
# We are more lenient in which string character escapes we allow than
# the spec (7.8.4 p. 19) wants us to be. This is for compatibility.
-S7.8.4_A4.3_T2: FAIL_OK
-S7.8.4_A4.3_T2: FAIL_OK
-S7.8.4_A6.2_T2: FAIL_OK
-S7.8.4_A6.1_T4: FAIL_OK
-S7.8.4_A4.3_T4: FAIL_OK
-S7.8.4_A7.2_T2: FAIL_OK
-S7.8.4_A7.1_T4: FAIL_OK
-S7.8.4_A6.4_T2: FAIL_OK
-S7.8.4_A7.4_T2: FAIL_OK
-S7.8.4_A7.2_T4: FAIL_OK
-S7.8.4_A4.3_T6: FAIL_OK
-S7.8.4_A7.2_T6: FAIL_OK
S7.8.4_A4.3_T1: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
-S7.8.4_A4.3_T3: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
-S7.8.4_A6.4_T1: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
-S7.8.4_A7.4_T1: FAIL_OK
-S7.8.4_A4.3_T5: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
-S7.8.4_A4.3_T1: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
+S7.8.4_A4.3_T2: FAIL_OK
S7.8.4_A4.3_T3: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
+S7.8.4_A4.3_T4: FAIL_OK
S7.8.4_A6.4_T1: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
+S7.8.4_A6.4_T2: FAIL_OK
S7.8.4_A7.4_T1: FAIL_OK
-S7.8.4_A4.3_T5: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
+S7.8.4_A7.4_T2: FAIL_OK
# Sputnik expects unicode escape sequences in RegExp flags to be interpreted.
# The specification requires them to be passed uninterpreted to the RegExp
S15.8.2.16_A7: PASS || FAIL_OK
S15.8.2.18_A7: PASS || FAIL_OK
-# We are more lenient in which string character escapes we allow than
-# the spec (7.8.4 p. 19) wants us to be. This is for compatibility.
-S7.8.4_A6.1_T4: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
-S7.8.4_A6.2_T2: FAIL_OK
-S7.8.4_A7.1_T4: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
-S7.8.4_A7.2_T2: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
-S7.8.4_A7.2_T4: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
-S7.8.4_A7.2_T6: FAIL_OK
-Sbp_7.8.4_A6.1_T4: FAIL_OK
-Sbp_7.8.4_A6.2_T1: FAIL_OK
-Sbp_7.8.4_A6.2_T2: FAIL_OK
-
# Linux for ia32 (and therefore simulators) default to extended 80 bit floating
# point formats, so these tests checking 64-bit FP precision fail. The other
# platforms/arch's pass these tests.