ES6 unicode extensions, part 1.

author marja <marja@chromium.org>

Tue, 2 Dec 2014 10:58:11 +0000 (02:58 -0800)

committer Commit bot <commit-bot@chromium.org>

Tue, 2 Dec 2014 10:58:19 +0000 (10:58 +0000)
author marja <marja@chromium.org>
Tue, 2 Dec 2014 10:58:11 +0000 (02:58 -0800)
committer Commit bot <commit-bot@chromium.org>
Tue, 2 Dec 2014 10:58:19 +0000 (10:58 +0000)
diff --git a/src/bootstrapper.cc b/src/bootstrapper.cc

index 5fed349..052d9f6 100644 (file)
--- a/src/bootstrapper.cc
+++ b/src/bootstrapper.cc
@@ -1590,6 +1590,7 @@ EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_numeric_literals)
  EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_tostring)
  EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_templates)
  EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_sloppy)
+EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_unicode)
  
  
  void Genesis::InstallNativeFunctions_harmony_proxies() {
@@ -1618,6 +1619,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_tostring)
  EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_proxies)
  EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_templates)
  EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_sloppy)
+EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode)
  
  void Genesis::InitializeGlobal_harmony_regexps() {
    Handle<JSObject> builtins(native_context()->builtins());
@@ -2176,6 +2178,7 @@ bool Genesis::InstallExperimentalNatives() {
    static const char* harmony_templates_natives[] = {
        "native harmony-templates.js", NULL};
    static const char* harmony_sloppy_natives[] = {NULL};
+  static const char* harmony_unicode_natives[] = {NULL};
  
    for (int i = ExperimentalNatives::GetDebuggerCount();
         i < ExperimentalNatives::GetBuiltinsCount(); i++) {
diff --git a/src/flag-definitions.h b/src/flag-definitions.h

index d1e74b6..55b4bde 100644 (file)
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -162,17 +162,18 @@ DEFINE_IMPLICATION(harmony, es_staging)
  DEFINE_IMPLICATION(es_staging, harmony)
  
  // Features that are still work in progress (behind individual flags).
-#define HARMONY_INPROGRESS(V)                                     \
-  V(harmony_modules, "harmony modules (implies block scoping)")   \
-  V(harmony_arrays, "harmony array methods")                      \
-  V(harmony_classes,                                              \
+#define HARMONY_INPROGRESS(V)                                             \
+  V(harmony_modules, "harmony modules (implies block scoping)")           \
+  V(harmony_arrays, "harmony array methods")                              \
+  V(harmony_classes,                                                      \
      "harmony classes (implies block scoping & object literal extension)") \
-  V(harmony_object_literals, "harmony object literal extensions") \
-  V(harmony_regexps, "harmony regular expression extensions")     \
-  V(harmony_arrow_functions, "harmony arrow functions")           \
-  V(harmony_proxies, "harmony proxies")                           \
-  V(harmony_templates, "harmony template literals")               \
-  V(harmony_sloppy, "harmony features in sloppy mode")
+  V(harmony_object_literals, "harmony object literal extensions")         \
+  V(harmony_regexps, "harmony regular expression extensions")             \
+  V(harmony_arrow_functions, "harmony arrow functions")                   \
+  V(harmony_proxies, "harmony proxies")                                   \
+  V(harmony_templates, "harmony template literals")                       \
+  V(harmony_sloppy, "harmony features in sloppy mode")                    \
+  V(harmony_unicode, "harmony unicode escapes")
  
  // Features that are complete (but still behind --harmony/es-staging flag).
  #define HARMONY_STAGED(V)                 \
diff --git a/src/parser.cc b/src/parser.cc

index 83fb594..6fcca20 100644 (file)
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -805,6 +805,7 @@ Parser::Parser(CompilationInfo* info, ParseInfo* parse_info)
    set_allow_harmony_object_literals(FLAG_harmony_object_literals);
    set_allow_harmony_templates(FLAG_harmony_templates);
    set_allow_harmony_sloppy(FLAG_harmony_sloppy);
+  set_allow_harmony_unicode(FLAG_harmony_unicode);
    for (int feature = 0; feature < v8::Isolate::kUseCounterFeatureCount;
         ++feature) {
      use_counts_[feature] = 0;
@@ -3974,6 +3975,7 @@ PreParser::PreParseResult Parser::ParseLazyFunctionBodyWithPreParser(
          allow_harmony_object_literals());
      reusable_preparser_->set_allow_harmony_templates(allow_harmony_templates());
      reusable_preparser_->set_allow_harmony_sloppy(allow_harmony_sloppy());
+    reusable_preparser_->set_allow_harmony_unicode(allow_harmony_unicode());
    }
    PreParser::PreParseResult result =
        reusable_preparser_->PreParseLazyFunction(strict_mode(),
diff --git a/src/preparser.h b/src/preparser.h

index b50019c..cef5b94 100644 (file)
--- a/src/preparser.h
+++ b/src/preparser.h
@@ -107,6 +107,7 @@ class ParserBase : public Traits {
    }
    bool allow_harmony_templates() const { return scanner()->HarmonyTemplates(); }
    bool allow_harmony_sloppy() const { return allow_harmony_sloppy_; }
+  bool allow_harmony_unicode() const { return scanner()->HarmonyUnicode(); }
  
    // Setters that determine whether certain syntactical constructs are
    // allowed to be parsed by this instance of the parser.
@@ -136,6 +137,9 @@ class ParserBase : public Traits {
    void set_allow_harmony_sloppy(bool allow) {
      allow_harmony_sloppy_ = allow;
    }
+  void set_allow_harmony_unicode(bool allow) {
+    scanner()->SetHarmonyUnicode(allow);
+  }
  
   protected:
    enum AllowEvalOrArgumentsAsIdentifier {
diff --git a/src/scanner.cc b/src/scanner.cc

index 3214c6f..6ce222c 100644 (file)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -39,7 +39,8 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
        harmony_modules_(false),
        harmony_numeric_literals_(false),
        harmony_classes_(false),
-      harmony_templates_(false) {}
+      harmony_templates_(false),
+      harmony_unicode_(false) {}
  
  
  void Scanner::Initialize(Utf16CharacterStream* source) {
@@ -72,6 +73,22 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
  }
  
  
+uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
+  uc32 x = 0;
+  int d = HexValue(c0_);
+  if (d < 0) {
+    return -1;
+  }
+  while (d >= 0) {
+    x = x * 16 + d;
+    if (x > max_value) return -1;
+    Advance();
+    d = HexValue(c0_);
+  }
+  return x;
+}
+
+
  // Ensure that tokens can be stored in a byte.
  STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
  
@@ -700,7 +717,7 @@ bool Scanner::ScanEscape() {
      case 'r' : c = '\r'; break;
      case 't' : c = '\t'; break;
      case 'u' : {
-      c = ScanHexNumber(4);
+      c = ScanUnicodeEscape();
        if (c < 0) return false;
        break;
      }
@@ -964,6 +981,26 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
    Advance();
    if (c0_ != 'u') return -1;
    Advance();
+  return ScanUnicodeEscape();
+}
+
+
+uc32 Scanner::ScanUnicodeEscape() {
+  // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
+  // allowed). In the latter case, the number of hex digits between { } is
+  // arbitrary. \ and u have already been read.
+  if (c0_ == '{' && HarmonyUnicode()) {
+    Advance();
+    uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);
+    if (cp < 0) {
+      return -1;
+    }
+    if (c0_ != '}') {
+      return -1;
+    }
+    Advance();
+    return cp;
+  }
    return ScanHexNumber(4);
  }
  
diff --git a/src/scanner.h b/src/scanner.h

index 46e6d32..446355f 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -460,6 +460,8 @@ class Scanner {
    }
    bool HarmonyTemplates() const { return harmony_templates_; }
    void SetHarmonyTemplates(bool templates) { harmony_templates_ = templates; }
+  bool HarmonyUnicode() const { return harmony_unicode_; }
+  void SetHarmonyUnicode(bool unicode) { harmony_unicode_ = unicode; }
  
    // Returns true if there was a line terminator before the peek'ed token,
    // possibly inside a multi-line comment.
@@ -616,6 +618,10 @@ class Scanner {
    }
  
    uc32 ScanHexNumber(int expected_length);
+  // Scan a number of any length but not bigger than max_value. For example, the
+  // number can be 000000001, so it's very long in characters but its value is
+  // small.
+  uc32 ScanUnlimitedLengthHexNumber(int max_value);
  
    // Scans a single JavaScript token.
    void Scan();
@@ -642,6 +648,8 @@ class Scanner {
    // Decodes a Unicode escape-sequence which is part of an identifier.
    // If the escape sequence cannot be decoded the result is kBadChar.
    uc32 ScanIdentifierUnicodeEscape();
+  // Helper for the above functions.
+  uc32 ScanUnicodeEscape();
  
    // Return the current source position.
    int source_pos() {
@@ -688,6 +696,8 @@ class Scanner {
    bool harmony_classes_;
    // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL
    bool harmony_templates_;
+  // Whether we allow \u{xxxxx}.
+  bool harmony_unicode_;
  };
  
  } }  // namespace v8::internal
diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc

index 4e2f828..7da5dac 100644 (file)
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@@ -1357,7 +1357,8 @@ enum ParserFlag {
    kAllowHarmonyClasses,
    kAllowHarmonyObjectLiterals,
    kAllowHarmonyTemplates,
-  kAllowHarmonySloppy
+  kAllowHarmonySloppy,
+  kAllowHarmonyUnicode
  };
  
  
@@ -1383,6 +1384,7 @@ void SetParserFlags(i::ParserBase<Traits>* parser,
    parser->set_allow_harmony_classes(flags.Contains(kAllowHarmonyClasses));
    parser->set_allow_harmony_templates(flags.Contains(kAllowHarmonyTemplates));
    parser->set_allow_harmony_sloppy(flags.Contains(kAllowHarmonySloppy));
+  parser->set_allow_harmony_unicode(flags.Contains(kAllowHarmonyUnicode));
  }
  
  
@@ -1693,6 +1695,7 @@ void RunParserSyncTest(const char* context_data[][2],
      kAllowHarmonyModules,
      kAllowHarmonyTemplates,
      kAllowHarmonySloppy,
+    kAllowHarmonyUnicode,
      kAllowLazy,
      kAllowNatives,
    };
@@ -4374,8 +4377,52 @@ TEST(InvalidUnicodeEscapes) {
      // No escapes allowed in regexp flags
      "/regex/\\u0069g",
      "/regex/\\u006g",
+    // Braces gone wrong
+    "var foob\\u{c481r = 0;",
+    "var foob\\uc481}r = 0;",
+    "var \\u{0052oo = 0;",
+    "var \\u0052}oo = 0;",
+    "\"foob\\u{c481r\"",
+    "var foob\\u{}ar = 0;",
+    // Too high value for the unicode escape
+    "\"\\u{110000}\"",
+    // Not an unicode escape
+    "var foob\\v1234r = 0;",
+    "var foob\\U1234r = 0;",
+    "var foob\\v{1234}r = 0;",
+    "var foob\\U{1234}r = 0;",
      NULL};
-  RunParserSyncTest(context_data, data, kError);
+  static const ParserFlag always_flags[] = {kAllowHarmonyUnicode};
+  RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags,
+                    arraysize(always_flags));
+}
+
+
+TEST(UnicodeEscapes) {
+  const char* context_data[][2] = {{"", ""},
+                                   {"'use strict';", ""},
+                                   {NULL, NULL}};
+  const char* data[] = {
+    // Identifier starting with escape
+    "var \\u0052oo = 0;",
+    "var \\u{0052}oo = 0;",
+    "var \\u{52}oo = 0;",
+    "var \\u{00000000052}oo = 0;",
+    // Identifier with an escape but not starting with an escape
+    "var foob\\uc481r = 0;",
+    "var foob\\u{c481}r = 0;",
+    // String with an escape
+    "\"foob\\uc481r\"",
+    "\"foob\\{uc481}r\"",
+    // This character is a valid unicode character, representable as a surrogate
+    // pair, not representable as 4 hex digits.
+    "\"foo\\u{10e6d}\"",
+    // Max value for the unicode escape
+    "\"\\u{10ffff}\"",
+    NULL};
+  static const ParserFlag always_flags[] = {kAllowHarmonyUnicode};
+  RunParserSyncTest(context_data, data, kSuccess, NULL, 0, always_flags,
+                    arraysize(always_flags));
  }
  
  
diff --git a/test/mjsunit/harmony/unicode-escapes.js b/test/mjsunit/harmony/unicode-escapes.js

new file mode 100644 (file)

index 0000000..b39ee1a
--- /dev/null
+++ b/test/mjsunit/harmony/unicode-escapes.js
@@ -0,0 +1,46 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
+
+// Flags: --harmony-unicode
+
+// Unicode escapes in variable names.
+
+(function TestVariableNames1() {
+  var foobar = 1;
+  assertEquals(foob\u0061r, 1);
+  assertEquals(foob\u{0061}r, 1);
+  assertEquals(foob\u{61}r, 1);
+  assertEquals(foob\u{0000000061}r, 1);
+})();
+
+(function TestVariableNames2() {
+  var foobar = 1;
+  assertEquals(\u0066oobar, 1);
+  assertEquals(\u{0066}oobar, 1);
+  assertEquals(\u{66}oobar, 1);
+  assertEquals(\u{0000000066}oobar, 1);
+})();
+
+// Unicode escapes in strings.
+
+(function TestStrings() {
+  var s1 = "foob\u0061r";
+  assertEquals(s1, "foobar");
+  var s2 = "foob\u{0061}r";
+  assertEquals(s2, "foobar");
+  var s3 = "foob\u{61}r";
+  assertEquals(s3, "foobar");
+  var s4 = "foob\u{0000000061}r";
+  assertEquals(s4, "foobar");
+})();
+
+
+(function TestSurrogates() {
+  // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
+  var s1 = "foo\u{10e6d}";
+  var s2 = "foo\u{d803}\u{de6d}";
+  assertEquals(s1, s2);
+})();
author	marja <marja@chromium.org>
	Tue, 2 Dec 2014 10:58:11 +0000 (02:58 -0800)
committer	Commit bot <commit-bot@chromium.org>
	Tue, 2 Dec 2014 10:58:19 +0000 (10:58 +0000)
src/bootstrapper.cc		patch \| blob \| history
src/flag-definitions.h		patch \| blob \| history
src/parser.cc		patch \| blob \| history
src/preparser.h		patch \| blob \| history
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
test/cctest/test-parsing.cc		patch \| blob \| history
test/mjsunit/harmony/unicode-escapes.js	[new file with mode: 0644]	patch \| blob