Fix illegal escape-sequences to throw syntax errors.

author mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)

committer mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)
author mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)
committer mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)
diff --git a/src/scanner.cc b/src/scanner.cc

index 7901b5d..f24af2e 100755 (executable)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -611,7 +611,7 @@ void Scanner::SeekForward(int pos) {
  }
  
  
-void Scanner::ScanEscape() {
+bool Scanner::ScanEscape() {
    uc32 c = c0_;
    Advance();
  
@@ -621,7 +621,7 @@ void Scanner::ScanEscape() {
      if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
      // Allow LF+CR newlines in multiline string literals.
      if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
-    return;
+    return true;
    }
  
    switch (c) {
@@ -635,13 +635,13 @@ void Scanner::ScanEscape() {
      case 't' : c = '\t'; break;
      case 'u' : {
        c = ScanHexNumber(4);
-      if (c < 0) c = 'u';
+      if (c < 0) return false;
        break;
      }
      case 'v' : c = '\v'; break;
      case 'x' : {
        c = ScanHexNumber(2);
-      if (c < 0) c = 'x';
+      if (c < 0) return false;
        break;
      }
      case '0' :  // fall through
@@ -654,10 +654,11 @@ void Scanner::ScanEscape() {
      case '7' : c = ScanOctalEscape(c, 2); break;
    }
  
-  // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
-  // should be illegal, but they are commonly handled
-  // as non-escaped characters by JS VMs.
+  // According to ECMA-262, section 7.8.4, characters not covered by the
+  // above cases should be illegal, but they are commonly handled as
+  // non-escaped characters by JS VMs.
    AddLiteralChar(c);
+  return true;
  }
  
  
@@ -696,8 +697,7 @@ Token::Value Scanner::ScanString() {
      uc32 c = c0_;
      Advance();
      if (c == '\\') {
-      if (c0_ < 0) return Token::ILLEGAL;
-      ScanEscape();
+      if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
      } else {
        AddLiteralChar(c);
      }
diff --git a/src/scanner.h b/src/scanner.h

index 045e7d2..4de413b 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -520,13 +520,16 @@ class Scanner {
    Token::Value ScanIdentifierOrKeyword();
    Token::Value ScanIdentifierSuffix(LiteralScope* literal);
  
-  void ScanEscape();
    Token::Value ScanString();
  
-  // Decodes a unicode escape-sequence which is part of an identifier.
+  // Scans an escape-sequence which is part of a string and adds the
+  // decoded character to the current literal. Returns true if a pattern
+  // is scanned.
+  bool ScanEscape();
+  // Decodes a Unicode escape-sequence which is part of an identifier.
    // If the escape sequence cannot be decoded the result is kBadChar.
    uc32 ScanIdentifierUnicodeEscape();
-  // Recognizes a uniocde escape-sequence and adds its characters,
+  // Scans a Unicode escape-sequence and adds its characters,
    // uninterpreted, to the current literal. Used for parsing RegExp
    // flags.
    bool ScanLiteralUnicodeEscape();
diff --git a/test/mjsunit/compiler/literals.js b/test/mjsunit/compiler/literals.js

index e910bb3..8607cd9 100644 (file)
--- a/test/mjsunit/compiler/literals.js
+++ b/test/mjsunit/compiler/literals.js
@@ -36,38 +36,38 @@ assertEquals(8, eval("6;'abc';8"));
  
  // Characters just outside the ranges of hex-escapes.
  // "/" comes just before "0".
-assertEquals("x1/", "\x1/");
-assertEquals("u111/", "\u111/");
+assertThrows('"\\x1/"');
+assertThrows('"\\u111/"');
  assertEquals("\\x1/", RegExp("\\x1/").source);
  assertEquals("\\u111/", RegExp("\\u111/").source);
  
  // ":" comes just after "9".
-assertEquals("x1:", "\x1:");
-assertEquals("u111:", "\u111:");
+assertThrows('"\\x1:"');
+assertThrows('"\\u111:"');
  assertEquals("\\x1:", /\x1:/.source);
  assertEquals("\\u111:", /\u111:/.source);
  
  // "`" comes just before "a".
-assertEquals("x1`", "\x1`");
-assertEquals("u111`", "\u111`");
+assertThrows('"\\x1`"');
+assertThrows('"\\u111`"');
  assertEquals("\\x1`", /\x1`/.source);
  assertEquals("\\u111`", /\u111`/.source);
  
  // "g" comes just before "f".
-assertEquals("x1g", "\x1g");
-assertEquals("u111g", "\u111g");
+assertThrows('"\\x1g"');
+assertThrows('"\\u111g"');
  assertEquals("\\x1g", /\x1g/.source);
  assertEquals("\\u111g", /\u111g/.source);
  
  // "@" comes just before "A".
-assertEquals("x1@", "\x1@");
-assertEquals("u111@", "\u111@");
+assertThrows('"\\x1@"');
+assertThrows('"\\u111@"');
  assertEquals("\\x1@", /\x1@/.source);
  assertEquals("\\u111@", /\u111@/.source);
  
  // "G" comes just after "F".
-assertEquals("x1G", "\x1G");
-assertEquals("u111G", "\u111G");
+assertThrows('"\\x1G"');
+assertThrows('"\\u111G"');
  assertEquals("\\x1G", /\x1G/.source);
  assertEquals("\\u111G", /\u111G/.source);
  
diff --git a/test/mozilla/mozilla.status b/test/mozilla/mozilla.status

index e64959a..9eafb4b 100644 (file)
--- a/test/mozilla/mozilla.status
+++ b/test/mozilla/mozilla.status
@@ -592,6 +592,14 @@ js1_5/Regress/regress-416737-01: FAIL_OK
  js1_5/Regress/regress-416737-02: FAIL_OK
  
  
+# Illegal escape-sequences in string literals. Has already been fixed
+# by most engines (i.e. V8, JSC, Opera and FF).
+ecma/Array/15.4.5.1-1: FAIL_OK
+ecma/LexicalConventions/7.7.4: FAIL_OK
+ecma_2/RegExp/hex-001: FAIL_OK
+js1_2/regexp/hexadecimal: FAIL_OK
+
+
  ##################### FAILING TESTS #####################
  
  # This section is for tests that fail in V8 and pass in JSC.
diff --git a/test/sputnik/sputnik.status b/test/sputnik/sputnik.status

index a4c7d57..5cda6fd 100644 (file)
--- a/test/sputnik/sputnik.status
+++ b/test/sputnik/sputnik.status
@@ -52,36 +52,14 @@ S15.10.2.11_A1_T3: FAIL
  
  # We are more lenient in which string character escapes we allow than
  # the spec (7.8.4 p. 19) wants us to be.  This is for compatibility.
-S7.8.4_A4.3_T2: FAIL_OK
-S7.8.4_A4.3_T2: FAIL_OK
-S7.8.4_A6.2_T2: FAIL_OK
-S7.8.4_A6.1_T4: FAIL_OK
-S7.8.4_A4.3_T4: FAIL_OK
-S7.8.4_A7.2_T2: FAIL_OK
-S7.8.4_A7.1_T4: FAIL_OK
-S7.8.4_A6.4_T2: FAIL_OK
-S7.8.4_A7.4_T2: FAIL_OK
-S7.8.4_A7.2_T4: FAIL_OK
-S7.8.4_A4.3_T6: FAIL_OK
-S7.8.4_A7.2_T6: FAIL_OK
  S7.8.4_A4.3_T1: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
-S7.8.4_A4.3_T3: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
-S7.8.4_A6.4_T1: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
-S7.8.4_A7.4_T1: FAIL_OK
-S7.8.4_A4.3_T5: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
-S7.8.4_A4.3_T1: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
+S7.8.4_A4.3_T2: FAIL_OK
  S7.8.4_A4.3_T3: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
+S7.8.4_A4.3_T4: FAIL_OK
  S7.8.4_A6.4_T1: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
+S7.8.4_A6.4_T2: FAIL_OK
  S7.8.4_A7.4_T1: FAIL_OK
-S7.8.4_A4.3_T5: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
+S7.8.4_A7.4_T2: FAIL_OK
  
  # Sputnik expects unicode escape sequences in RegExp flags to be interpreted.
  # The specification requires them to be passed uninterpreted to the RegExp
diff --git a/test/test262/test262.status b/test/test262/test262.status

index d405d21..aacb16c 100644 (file)
--- a/test/test262/test262.status
+++ b/test/test262/test262.status
@@ -60,22 +60,6 @@ S15.1.2.2_A5.1_T1: FAIL_OK
  S15.8.2.16_A7: PASS || FAIL_OK
  S15.8.2.18_A7: PASS || FAIL_OK
  
-# We are more lenient in which string character escapes we allow than
-# the spec (7.8.4 p. 19) wants us to be.  This is for compatibility.
-S7.8.4_A6.1_T4: FAIL_OK
-S7.8.4_A6.2_T1: FAIL_OK
-S7.8.4_A6.2_T2: FAIL_OK
-S7.8.4_A7.1_T4: FAIL_OK
-S7.8.4_A7.2_T1: FAIL_OK
-S7.8.4_A7.2_T2: FAIL_OK
-S7.8.4_A7.2_T3: FAIL_OK
-S7.8.4_A7.2_T4: FAIL_OK
-S7.8.4_A7.2_T5: FAIL_OK
-S7.8.4_A7.2_T6: FAIL_OK
-Sbp_7.8.4_A6.1_T4: FAIL_OK
-Sbp_7.8.4_A6.2_T1: FAIL_OK
-Sbp_7.8.4_A6.2_T2: FAIL_OK
-
  # Linux for ia32 (and therefore simulators) default to extended 80 bit floating
  # point formats, so these tests checking 64-bit FP precision fail. The other
  # platforms/arch's pass these tests.
author	mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)
committer	mstarzinger@chromium.org <mstarzinger@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 16 Apr 2012 15:54:02 +0000 (15:54 +0000)
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
test/mjsunit/compiler/literals.js		patch \| blob \| history
test/mozilla/mozilla.status		patch \| blob \| history
test/sputnik/sputnik.status		patch \| blob \| history
test/test262/test262.status		patch \| blob \| history